From a59f4e079d19464eebb9b06513a1d4f55fdae5ba Mon Sep 17 00:00:00 2001 From: Zhu Yanhai Date: Tue, 8 Jan 2013 12:56:52 +0800 Subject: sched: Fix the broken sched_rr_get_interval() The caller of sched_sliced() should pass se.cfs_rq and se as the arguments, however in sched_rr_get_interval() we gave it rq.cfs_rq and se, which made the following computation obviously wrong. The change was introduced by commit: 77034937dc45 sched: fix crash in sys_sched_rr_get_interval() ... 5 years ago, while it had been the correct 'cfs_rq_of' before the commit. The change seems to be irrelevant to the commit msg, which was to return a 0 timeslice for tasks that are on an idle runqueue. So I believe that was just a plain typo. Signed-off-by: Zhu Yanhai Cc: Peter Zijlstra Cc: Paul Turner Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Andrew Morton Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1357621012-15039-1-git-send-email-gaoyang.zyh@taobao.com [ Since this is an ABI and an old bug, we'll test this via a slow upstream route, to hopefully discover any app breakage. ] Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5eea8707234a..a7a19ffc3b7e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6101,7 +6101,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task * idle runqueue: */ if (rq->cfs.load.weight) - rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); + rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se)); return rr_interval; } -- cgit v1.2.3 From 1158ddb55416855fd17abe3214298f736f00426a Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 23 Nov 2012 00:02:15 +0400 Subject: sched/rt: Add reschedule check to switched_from_rt() Reschedule rq->curr if the first RT task has just been pulled to the rq. Signed-off-by: Kirill V Tkhai Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Tkhai Kirill Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/118761353614535@web28f.yandex.ru Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 418feb01344e..29bda5bdf2a5 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1889,8 +1889,11 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) * we may need to handle the pulling of RT tasks * now. */ - if (p->on_rq && !rq->rt.rt_nr_running) - pull_rt_task(rq); + if (!p->on_rq || rq->rt.rt_nr_running) + return; + + if (pull_rt_task(rq)) + resched_task(rq->curr); } void init_sched_rt_class(void) -- cgit v1.2.3 From 16c8f1c72ece3871a6c93003cd888fc2d003a7eb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 8 Nov 2012 13:33:46 +0530 Subject: sched/fair: Set se->vruntime directly in place_entity() We are first storing the new vruntime in a variable and then storing it in se->vruntime. Simply update se->vruntime directly. Signed-off-by: Viresh Kumar Cc: linaro-dev@lists.linaro.org Cc: patches@linaro.org Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/ae59db1945518d6f6250920d46eb1f1a9cc0024e.1352361704.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a7a19ffc3b7e..8dbee9f4ceb2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1680,9 +1680,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) } /* ensure we never gain time by being placed backwards. */ - vruntime = max_vruntime(se->vruntime, vruntime); - - se->vruntime = vruntime; + se->vruntime = max_vruntime(se->vruntime, vruntime); } static void check_enqueue_throttle(struct cfs_rq *cfs_rq); -- cgit v1.2.3 From 57d2aa00dcec67afa52478730f2b524521af14fb Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 17 Jul 2012 15:03:43 +0800 Subject: sched/rt: Avoid updating RT entry timeout twice within one tick period The issue below was found in 2.6.34-rt rather than mainline rt kernel, but the issue still exists upstream as well. So please let me describe how it was noticed on 2.6.34-rt: On this version, each softirq has its own thread, it means there is at least one RT FIFO task per cpu. The priority of these tasks is set to 49 by default. If user launches an RT FIFO task with priority lower than 49 of softirq RT tasks, it's possible there are two RT FIFO tasks enqueued one cpu runqueue at one moment. By current strategy of balancing RT tasks, when it comes to RT tasks, we really need to put them off to a CPU that they can run on as soon as possible. Even if it means a bit of cache line flushing, we want RT tasks to be run with the least latency. When the user RT FIFO task which just launched before is running, the sched timer tick of the current cpu happens. In this tick period, the timeout value of the user RT task will be updated once. Subsequently, we try to wake up one softirq RT task on its local cpu. As the priority of current user RT task is lower than the softirq RT task, the current task will be preempted by the higher priority softirq RT task. Before preemption, we check to see if current can readily move to a different cpu. If so, we will reschedule to allow the RT push logic to try to move current somewhere else. Whenever the woken softirq RT task runs, it first tries to migrate the user FIFO RT task over to a cpu that is running a task of lesser priority. If migration is done, it will send a reschedule request to the found cpu by IPI interrupt. Once the target cpu responds the IPI interrupt, it will pick the migrated user RT task to preempt its current task. When the user RT task is running on the new cpu, the sched timer tick of the cpu fires. So it will tick the user RT task again. This also means the RT task timeout value will be updated again. As the migration may be done in one tick period, it means the user RT task timeout value will be updated twice within one tick. If we set a limit on the amount of cpu time for the user RT task by setrlimit(RLIMIT_RTTIME), the SIGXCPU signal should be posted upon reaching the soft limit. But exactly when the SIGXCPU signal should be sent depends on the RT task timeout value. In fact the timeout mechanism of sending the SIGXCPU signal assumes the RT task timeout is increased once every tick. However, currently the timeout value may be added twice per tick. So it results in the SIGXCPU signal being sent earlier than expected. To solve this issue, we prevent the timeout value from increasing twice within one tick time by remembering the jiffies value of last updating the timeout. As long as the RT task's jiffies is different with the global jiffies value, we allow its timeout to be updated. Signed-off-by: Ying Xue Signed-off-by: Fan Du Reviewed-by: Yong Zhang Acked-by: Steven Rostedt Cc: Link: http://lkml.kernel.org/r/1342508623-2887-1-git-send-email-ying.xue@windriver.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/sched/rt.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index d2112477ff5e..924e42a8df58 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1208,6 +1208,7 @@ struct sched_entity { struct sched_rt_entity { struct list_head run_list; unsigned long timeout; + unsigned long watchdog_stamp; unsigned int time_slice; struct sched_rt_entity *back; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 29bda5bdf2a5..2f69ca997826 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1988,7 +1988,11 @@ static void watchdog(struct rq *rq, struct task_struct *p) if (soft != RLIM_INFINITY) { unsigned long next; - p->rt.timeout++; + if (p->rt.watchdog_stamp != jiffies) { + p->rt.timeout++; + p->rt.watchdog_stamp = jiffies; + } + next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); if (p->rt.timeout > next) p->cputime_expires.sched_exp = p->se.sum_exec_runtime; -- cgit v1.2.3 From 95a79fd458b85132c25e351d45037ec9643312b2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 7 Jan 2013 18:12:14 +0100 Subject: context_tracking: Export context state for generic vtime Export the context state: whether we run in user / kernel from the context tracking subsystem point of view. This is going to be used by the generic virtual cputime accounting subsystem that is needed to implement the full dynticks. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/context_tracking.h | 28 ++++++++++++++++++++++++++++ kernel/context_tracking.c | 16 +--------------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index e24339ccb7f0..b28d161c1091 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -3,12 +3,40 @@ #ifdef CONFIG_CONTEXT_TRACKING #include +#include + +struct context_tracking { + /* + * When active is false, probes are unset in order + * to minimize overhead: TIF flags are cleared + * and calls to user_enter/exit are ignored. This + * may be further optimized using static keys. + */ + bool active; + enum { + IN_KERNEL = 0, + IN_USER, + } state; +}; + +DECLARE_PER_CPU(struct context_tracking, context_tracking); + +static inline bool context_tracking_in_user(void) +{ + return __this_cpu_read(context_tracking.state) == IN_USER; +} + +static inline bool context_tracking_active(void) +{ + return __this_cpu_read(context_tracking.active); +} extern void user_enter(void); extern void user_exit(void); extern void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next); #else +static inline bool context_tracking_in_user(void) { return false; } static inline void user_enter(void) { } static inline void user_exit(void) { } static inline void context_tracking_task_switch(struct task_struct *prev, diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index e0e07fd55508..54f471e536dc 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -1,24 +1,10 @@ #include #include #include -#include #include -struct context_tracking { - /* - * When active is false, hooks are not set to - * minimize overhead: TIF flags are cleared - * and calls to user_enter/exit are ignored. This - * may be further optimized using static keys. - */ - bool active; - enum { - IN_KERNEL = 0, - IN_USER, - } state; -}; -static DEFINE_PER_CPU(struct context_tracking, context_tracking) = { +DEFINE_PER_CPU(struct context_tracking, context_tracking) = { #ifdef CONFIG_CONTEXT_TRACKING_FORCE .active = true, #endif -- cgit v1.2.3 From 62188451f0d63add7ad0cd2a1ae269d600c1663d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 26 Jan 2013 17:19:42 +0100 Subject: cputime: Avoid multiplication overflow on utime scaling We scale stime, utime values based on rtime (sum_exec_runtime converted to jiffies). During scaling we multiple rtime * utime, which seems to be fine, since both values are converted to u64, but it's not. Let assume HZ is 1000 - 1ms tick. Process consist of 64 threads, run for 1 day, threads utilize 100% cpu on user space. Machine has 64 cpus. Process rtime = utime will be 64 * 24 * 60 * 60 * 1000 jiffies, which is 0x149970000. Multiplication rtime * utime result is 0x1a855771100000000, which can not be covered in 64 bits. Result of overflow is stall of utime values visible in user space (prev_utime in kernel), even if application still consume lot of CPU time. A solution to solve this is to perform the multiplication on stime instead of utime. It's easy to grow the utime value fast with a CPU bound thread in userspace for example. Now we assume that doing so with stime is much harder. In most cases a task shouldn't ever spend much time in kernel space as it tends to sleep waiting for jobs completion when they take long to achieve. IO is the typical example of that. Hence scaling the cputime by performing the multiplication on stime instead of utime should considerably reduce the chances of an overflow on most workloads. This is largely inspired by a patch from Stanislaw Gruszka: http://lkml.kernel.org/r/20130107113144.GA7544@redhat.com Inspired-by: Stanislaw Gruszka Reported-by: Stanislaw Gruszka Acked-by: Stanislaw Gruszka Signed-off-by: Frederic Weisbecker Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Andrew Morton Link: http://lkml.kernel.org/r/1359217182-25184-1-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/cputime.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 293b202fcf79..825a956ccdb6 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -509,11 +509,11 @@ EXPORT_SYMBOL_GPL(vtime_account); # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) #endif -static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) +static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total) { u64 temp = (__force u64) rtime; - temp *= (__force u64) utime; + temp *= (__force u64) stime; if (sizeof(cputime_t) == 4) temp = div_u64(temp, (__force u32) total); @@ -531,10 +531,10 @@ static void cputime_adjust(struct task_cputime *curr, struct cputime *prev, cputime_t *ut, cputime_t *st) { - cputime_t rtime, utime, total; + cputime_t rtime, stime, total; - utime = curr->utime; - total = utime + curr->stime; + stime = curr->stime; + total = stime + curr->utime; /* * Tick based cputime accounting depend on random scheduling @@ -549,17 +549,17 @@ static void cputime_adjust(struct task_cputime *curr, rtime = nsecs_to_cputime(curr->sum_exec_runtime); if (total) - utime = scale_utime(utime, rtime, total); + stime = scale_stime(stime, rtime, total); else - utime = rtime; + stime = rtime; /* * If the tick based count grows faster than the scheduler one, * the result of the scaling may go backward. * Let's enforce monotonicity. */ - prev->utime = max(prev->utime, utime); - prev->stime = max(prev->stime, rtime - prev->utime); + prev->stime = max(prev->stime, stime); + prev->utime = max(prev->utime, rtime - prev->stime); *ut = prev->utime; *st = prev->stime; -- cgit v1.2.3 From 39613766e8826756a4bf8c74274eb971c3c1d929 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 15 Jan 2013 13:25:07 +0100 Subject: cputime: Librarize per nsecs resolution cputime definitions The full dynticks cputime accounting that we'll soon introduce will rely on sched_clock(). And its clock can have a per nanosecond granularity. To prepare for this, we need to have a cputime_t implementation that has this precision. ia64 virtual cputime accounting already uses that granularity so all we need is to librarize its implementation in the asm generic headers. Also librarize the default per jiffy granularity cputime_t as well so that we can easily pick either implementation depending on the cputime accounting config we choose. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tony Luck Cc: Fenghua Yu --- arch/ia64/include/asm/cputime.h | 88 ++------------------------------ include/asm-generic/cputime.h | 62 +--------------------- include/asm-generic/cputime_jiffies.h | 66 ++++++++++++++++++++++++ include/asm-generic/cputime_nsecs.h | 96 +++++++++++++++++++++++++++++++++++ 4 files changed, 167 insertions(+), 145 deletions(-) create mode 100644 include/asm-generic/cputime_jiffies.h create mode 100644 include/asm-generic/cputime_nsecs.h diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index 7fcf7f08ab06..040b31638001 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -19,91 +19,11 @@ #define __IA64_CPUTIME_H #ifndef CONFIG_VIRT_CPU_ACCOUNTING -#include +# include #else - -#include -#include -#include - -typedef u64 __nocast cputime_t; -typedef u64 __nocast cputime64_t; - -#define cputime_one_jiffy jiffies_to_cputime(1) - -/* - * Convert cputime <-> jiffies (HZ) - */ -#define cputime_to_jiffies(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) -#define jiffies_to_cputime(__jif) \ - (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) -#define cputime64_to_jiffies64(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) -#define jiffies64_to_cputime64(__jif) \ - (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ)) - -/* - * Convert cputime <-> microseconds - */ -#define cputime_to_usecs(__ct) \ - ((__force u64)(__ct) / NSEC_PER_USEC) -#define usecs_to_cputime(__usecs) \ - (__force cputime_t)((__usecs) * NSEC_PER_USEC) -#define usecs_to_cputime64(__usecs) \ - (__force cputime64_t)((__usecs) * NSEC_PER_USEC) - -/* - * Convert cputime <-> seconds - */ -#define cputime_to_secs(__ct) \ - ((__force u64)(__ct) / NSEC_PER_SEC) -#define secs_to_cputime(__secs) \ - (__force cputime_t)((__secs) * NSEC_PER_SEC) - -/* - * Convert cputime <-> timespec (nsec) - */ -static inline cputime_t timespec_to_cputime(const struct timespec *val) -{ - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec; - return (__force cputime_t) ret; -} -static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) -{ - val->tv_sec = (__force u64) ct / NSEC_PER_SEC; - val->tv_nsec = (__force u64) ct % NSEC_PER_SEC; -} - -/* - * Convert cputime <-> timeval (msec) - */ -static inline cputime_t timeval_to_cputime(struct timeval *val) -{ - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC; - return (__force cputime_t) ret; -} -static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) -{ - val->tv_sec = (__force u64) ct / NSEC_PER_SEC; - val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC; -} - -/* - * Convert cputime <-> clock (USER_HZ) - */ -#define cputime_to_clock_t(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ)) -#define clock_t_to_cputime(__x) \ - (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ)) - -/* - * Convert cputime64 to clock. - */ -#define cputime64_to_clock_t(__ct) \ - cputime_to_clock_t((__force cputime_t)__ct) - +# include +# include extern void arch_vtime_task_switch(struct task_struct *tsk); - #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ + #endif /* __IA64_CPUTIME_H */ diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index 9a62937c56ca..c6eddf50eaf9 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -4,66 +4,6 @@ #include #include -typedef unsigned long __nocast cputime_t; - -#define cputime_one_jiffy jiffies_to_cputime(1) -#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct) -#define cputime_to_scaled(__ct) (__ct) -#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz) - -typedef u64 __nocast cputime64_t; - -#define cputime64_to_jiffies64(__ct) (__force u64)(__ct) -#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif) - -#define nsecs_to_cputime64(__ct) \ - jiffies64_to_cputime64(nsecs_to_jiffies64(__ct)) - - -/* - * Convert cputime to microseconds and back. - */ -#define cputime_to_usecs(__ct) \ - jiffies_to_usecs(cputime_to_jiffies(__ct)) -#define usecs_to_cputime(__usec) \ - jiffies_to_cputime(usecs_to_jiffies(__usec)) -#define usecs_to_cputime64(__usec) \ - jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000)) - -/* - * Convert cputime to seconds and back. - */ -#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ) -#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ) - -/* - * Convert cputime to timespec and back. - */ -#define timespec_to_cputime(__val) \ - jiffies_to_cputime(timespec_to_jiffies(__val)) -#define cputime_to_timespec(__ct,__val) \ - jiffies_to_timespec(cputime_to_jiffies(__ct),__val) - -/* - * Convert cputime to timeval and back. - */ -#define timeval_to_cputime(__val) \ - jiffies_to_cputime(timeval_to_jiffies(__val)) -#define cputime_to_timeval(__ct,__val) \ - jiffies_to_timeval(cputime_to_jiffies(__ct),__val) - -/* - * Convert cputime to clock and back. - */ -#define cputime_to_clock_t(__ct) \ - jiffies_to_clock_t(cputime_to_jiffies(__ct)) -#define clock_t_to_cputime(__x) \ - jiffies_to_cputime(clock_t_to_jiffies(__x)) - -/* - * Convert cputime64 to clock. - */ -#define cputime64_to_clock_t(__ct) \ - jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct)) +#include #endif diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h new file mode 100644 index 000000000000..b747f1fac2d2 --- /dev/null +++ b/include/asm-generic/cputime_jiffies.h @@ -0,0 +1,66 @@ +#ifndef _ASM_GENERIC_CPUTIME_JIFFIES_H +#define _ASM_GENERIC_CPUTIME_JIFFIES_H + +typedef unsigned long __nocast cputime_t; + +#define cputime_one_jiffy jiffies_to_cputime(1) +#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct) +#define cputime_to_scaled(__ct) (__ct) +#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz) + +typedef u64 __nocast cputime64_t; + +#define cputime64_to_jiffies64(__ct) (__force u64)(__ct) +#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif) + +#define nsecs_to_cputime64(__ct) \ + jiffies64_to_cputime64(nsecs_to_jiffies64(__ct)) + + +/* + * Convert cputime to microseconds and back. + */ +#define cputime_to_usecs(__ct) \ + jiffies_to_usecs(cputime_to_jiffies(__ct)) +#define usecs_to_cputime(__usec) \ + jiffies_to_cputime(usecs_to_jiffies(__usec)) +#define usecs_to_cputime64(__usec) \ + jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000)) + +/* + * Convert cputime to seconds and back. + */ +#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ) +#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ) + +/* + * Convert cputime to timespec and back. + */ +#define timespec_to_cputime(__val) \ + jiffies_to_cputime(timespec_to_jiffies(__val)) +#define cputime_to_timespec(__ct,__val) \ + jiffies_to_timespec(cputime_to_jiffies(__ct),__val) + +/* + * Convert cputime to timeval and back. + */ +#define timeval_to_cputime(__val) \ + jiffies_to_cputime(timeval_to_jiffies(__val)) +#define cputime_to_timeval(__ct,__val) \ + jiffies_to_timeval(cputime_to_jiffies(__ct),__val) + +/* + * Convert cputime to clock and back. + */ +#define cputime_to_clock_t(__ct) \ + jiffies_to_clock_t(cputime_to_jiffies(__ct)) +#define clock_t_to_cputime(__x) \ + jiffies_to_cputime(clock_t_to_jiffies(__x)) + +/* + * Convert cputime64 to clock. + */ +#define cputime64_to_clock_t(__ct) \ + jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct)) + +#endif diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h new file mode 100644 index 000000000000..c73d182f4751 --- /dev/null +++ b/include/asm-generic/cputime_nsecs.h @@ -0,0 +1,96 @@ +/* + * Definitions for measuring cputime in nsecs resolution. + * + * Based on + * + * Copyright (C) 2007 FUJITSU LIMITED + * Copyright (C) 2007 Hidetoshi Seto + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _ASM_GENERIC_CPUTIME_NSECS_H +#define _ASM_GENERIC_CPUTIME_NSECS_H + +typedef u64 __nocast cputime_t; +typedef u64 __nocast cputime64_t; + +#define cputime_one_jiffy jiffies_to_cputime(1) + +/* + * Convert cputime <-> jiffies (HZ) + */ +#define cputime_to_jiffies(__ct) \ + ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) +#define jiffies_to_cputime(__jif) \ + (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) +#define cputime64_to_jiffies64(__ct) \ + ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) +#define jiffies64_to_cputime64(__jif) \ + (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ)) + +/* + * Convert cputime <-> microseconds + */ +#define cputime_to_usecs(__ct) \ + ((__force u64)(__ct) / NSEC_PER_USEC) +#define usecs_to_cputime(__usecs) \ + (__force cputime_t)((__usecs) * NSEC_PER_USEC) +#define usecs_to_cputime64(__usecs) \ + (__force cputime64_t)((__usecs) * NSEC_PER_USEC) + +/* + * Convert cputime <-> seconds + */ +#define cputime_to_secs(__ct) \ + ((__force u64)(__ct) / NSEC_PER_SEC) +#define secs_to_cputime(__secs) \ + (__force cputime_t)((__secs) * NSEC_PER_SEC) + +/* + * Convert cputime <-> timespec (nsec) + */ +static inline cputime_t timespec_to_cputime(const struct timespec *val) +{ + u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec; + return (__force cputime_t) ret; +} +static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) +{ + val->tv_sec = (__force u64) ct / NSEC_PER_SEC; + val->tv_nsec = (__force u64) ct % NSEC_PER_SEC; +} + +/* + * Convert cputime <-> timeval (msec) + */ +static inline cputime_t timeval_to_cputime(struct timeval *val) +{ + u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC; + return (__force cputime_t) ret; +} +static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) +{ + val->tv_sec = (__force u64) ct / NSEC_PER_SEC; + val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC; +} + +/* + * Convert cputime <-> clock (USER_HZ) + */ +#define cputime_to_clock_t(__ct) \ + ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ)) +#define clock_t_to_cputime(__x) \ + (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ)) + +/* + * Convert cputime64 to clock. + */ +#define cputime64_to_clock_t(__ct) \ + cputime_to_clock_t((__force cputime_t)__ct) + +#endif -- cgit v1.2.3 From ae8dda5c473bf1a85913942adcaac449e5754bf3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Jan 2013 18:02:04 +0100 Subject: cputime: Move default nsecs_to_cputime() to jiffies based cputime file If the architecture doesn't provide an implementation of nsecs_to_cputime(), the cputime accounting core uses a default one that converts the nanoseconds to jiffies. However this only makes sense if we use the jiffies based cputime. For now it doesn't matter much because this API is only called on code that uses jiffies based cputime accounting. But the code may evolve and this API may be used more broadly in the future. Keeping this default implementation around is very error prone as it may introduce a bug and hide it on architectures that don't override this API. Fix this by moving this definition to the jiffies based cputime headers as it is the only place where it belongs to. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/asm-generic/cputime_jiffies.h | 10 ++++++++-- kernel/sched/cputime.c | 4 ---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h index b747f1fac2d2..272ecba9f588 100644 --- a/include/asm-generic/cputime_jiffies.h +++ b/include/asm-generic/cputime_jiffies.h @@ -13,8 +13,14 @@ typedef u64 __nocast cputime64_t; #define cputime64_to_jiffies64(__ct) (__force u64)(__ct) #define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif) -#define nsecs_to_cputime64(__ct) \ - jiffies64_to_cputime64(nsecs_to_jiffies64(__ct)) + +/* + * Convert nanoseconds to cputime + */ +#define nsecs_to_cputime64(__nsec) \ + jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec)) +#define nsecs_to_cputime(__nsec) \ + jiffies_to_cputime(nsecs_to_jiffies(__nsec)) /* diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 293b202fcf79..5849448b981e 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -505,10 +505,6 @@ EXPORT_SYMBOL_GPL(vtime_account); #else -#ifndef nsecs_to_cputime -# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) -#endif - static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) { u64 temp = (__force u64) rtime; -- cgit v1.2.3 From abf917cd91cbb73952758f9741e2fa65002a48ee Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 25 Jul 2012 07:56:04 +0200 Subject: cputime: Generic on-demand virtual cputime accounting If we want to stop the tick further idle, we need to be able to account the cputime without using the tick. Virtual based cputime accounting solves that problem by hooking into kernel/user boundaries. However implementing CONFIG_VIRT_CPU_ACCOUNTING require low level hooks and involves more overhead. But we already have a generic context tracking subsystem that is required for RCU needs by archs which plan to shut down the tick outside idle. This patch implements a generic virtual based cputime accounting that relies on these generic kernel/user hooks. There are some upsides of doing this: - This requires no arch code to implement CONFIG_VIRT_CPU_ACCOUNTING if context tracking is already built (already necessary for RCU in full tickless mode). - We can rely on the generic context tracking subsystem to dynamically (de)activate the hooks, so that we can switch anytime between virtual and tick based accounting. This way we don't have the overhead of the virtual accounting when the tick is running periodically. And one downside: - There is probably more overhead than a native virtual based cputime accounting. But this relies on hooks that are already set anyway. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- arch/ia64/include/asm/cputime.h | 6 +-- arch/ia64/include/asm/thread_info.h | 4 +- arch/ia64/include/asm/xen/minstate.h | 2 +- arch/ia64/kernel/asm-offsets.c | 2 +- arch/ia64/kernel/entry.S | 16 ++++---- arch/ia64/kernel/fsys.S | 4 +- arch/ia64/kernel/head.S | 4 +- arch/ia64/kernel/ivt.S | 8 ++-- arch/ia64/kernel/minstate.h | 2 +- arch/ia64/kernel/time.c | 4 +- arch/powerpc/configs/chroma_defconfig | 2 +- arch/powerpc/configs/corenet64_smp_defconfig | 2 +- arch/powerpc/configs/pasemi_defconfig | 2 +- arch/powerpc/include/asm/cputime.h | 6 +-- arch/powerpc/include/asm/lppaca.h | 2 +- arch/powerpc/include/asm/ppc_asm.h | 4 +- arch/powerpc/kernel/entry_64.S | 4 +- arch/powerpc/kernel/time.c | 4 +- arch/powerpc/platforms/pseries/dtl.c | 6 +-- arch/powerpc/platforms/pseries/setup.c | 6 +-- include/asm-generic/cputime.h | 8 +++- include/asm-generic/cputime_nsecs.h | 8 ++++ include/linux/kernel_stat.h | 2 +- include/linux/vtime.h | 16 ++++++++ init/Kconfig | 23 ++++++++++- kernel/context_tracking.c | 6 ++- kernel/sched/cputime.c | 61 ++++++++++++++++++++++++++-- 27 files changed, 160 insertions(+), 54 deletions(-) diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index 040b31638001..e2d3f5baf265 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -11,19 +11,19 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec. + * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec. * Otherwise we measure cpu time in jiffies using the generic definitions. */ #ifndef __IA64_CPUTIME_H #define __IA64_CPUTIME_H -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE # include #else # include # include extern void arch_vtime_task_switch(struct task_struct *tsk); -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* __IA64_CPUTIME_H */ diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index ff2ae4136584..020d655ed082 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -31,7 +31,7 @@ struct thread_info { mm_segment_t addr_limit; /* user-level address space limit */ int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ struct restart_block restart_block; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE __u64 ac_stamp; __u64 ac_leave; __u64 ac_stime; @@ -69,7 +69,7 @@ struct thread_info { #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #define setup_thread_stack(p, org) \ *task_thread_info(p) = *task_thread_info(org); \ task_thread_info(p)->ac_stime = 0; \ diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h index c57fa910f2c9..00cf03e0cb82 100644 --- a/arch/ia64/include/asm/xen/minstate.h +++ b/arch/ia64/include/asm/xen/minstate.h @@ -1,5 +1,5 @@ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* read ar.itc in advance, and use it before leaving bank 0 */ #define XEN_ACCOUNT_GET_STAMP \ MOV_FROM_ITC(pUStk, p6, r20, r2); diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index a48bd9a9927b..46c9e3007315 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -41,7 +41,7 @@ void foo(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp)); DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave)); DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime)); diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 6bfd8429ee0f..7a53530f22c2 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -724,7 +724,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall) #endif .global __paravirt_work_processed_syscall; __paravirt_work_processed_syscall: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE adds r2=PT(LOADRS)+16,r12 MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 @@ -762,7 +762,7 @@ __paravirt_work_processed_syscall: ld8 r29=[r2],16 // M0|1 load cr.ipsr ld8 r28=[r3],16 // M0|1 load cr.iip -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13 ;; ld8 r30=[r2],16 // M0|1 load cr.ifs @@ -793,7 +793,7 @@ __paravirt_work_processed_syscall: ld8.fill r1=[r3],16 // M0|1 load r1 (pUStk) mov r17=1 // A ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) st1 [r15]=r17 // M2|3 #else (pUStk) st1 [r14]=r17 // M2|3 @@ -813,7 +813,7 @@ __paravirt_work_processed_syscall: shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition COVER // B add current frame into dirty partition & set cr.ifs ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE mov r19=ar.bsp // M2 get new backing store pointer st8 [r14]=r22 // M save time at leave mov f10=f0 // F clear f10 @@ -948,7 +948,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) adds r16=PT(CR_IPSR)+16,r12 adds r17=PT(CR_IIP)+16,r12 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE .pred.rel.mutex pUStk,pKStk MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave @@ -981,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ;; ld8.fill r12=[r16],16 ld8.fill r13=[r17],16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18 #else (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 @@ -989,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ;; ld8 r20=[r16],16 // ar.fpsr ld8.fill r15=[r17],16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred #endif ;; @@ -997,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ld8.fill r2=[r17] (pUStk) mov r17=1 ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;; // mib : mov add br -> mib : ld8 add br // bbb_ : br nop cover;; mbb_ : mov br cover;; diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index e662f178b990..c4cd45d97749 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -529,7 +529,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down) nop.i 0 ;; mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting #else nop.m 0 @@ -555,7 +555,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down) cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 br.call.sptk.many b7=ia64_syscall_setup // B ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE // mov.m r30=ar.itc is called in advance add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index 4738ff7bd66a..9be4e497f3d3 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -1073,7 +1073,7 @@ END(ia64_native_sched_clock) sched_clock = ia64_native_sched_clock #endif -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE GLOBAL_ENTRY(cycle_to_cputime) alloc r16=ar.pfs,1,0,0,0 addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 @@ -1091,7 +1091,7 @@ GLOBAL_ENTRY(cycle_to_cputime) shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT br.ret.sptk.many rp END(cycle_to_cputime) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_IA64_BRL_EMU diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index fa25689fc453..689ffcaa284e 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -784,7 +784,7 @@ ENTRY(break_fault) (p8) adds r28=16,r28 // A switch cr.iip to next bundle (p9) adds r8=1,r8 // A increment ei to next slot -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE ;; mov b6=r30 // I0 setup syscall handler branch reg early #else @@ -801,7 +801,7 @@ ENTRY(break_fault) // /////////////////////////////////////////////////////////////////////// st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting #else mov b6=r30 // I0 setup syscall handler branch reg early @@ -817,7 +817,7 @@ ENTRY(break_fault) cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? br.call.sptk.many b7=ia64_syscall_setup // B 1: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE // mov.m r30=ar.itc is called in advance, and r13 is current add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A @@ -1043,7 +1043,7 @@ END(ia64_syscall_setup) DBG_FAULT(16) FAULT(16) -#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) +#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) /* * There is no particular reason for this code to be here, other than * that there happens to be space here that would go unused otherwise. diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h index d56753a11636..cc82a7d744c9 100644 --- a/arch/ia64/kernel/minstate.h +++ b/arch/ia64/kernel/minstate.h @@ -4,7 +4,7 @@ #include "entry.h" #include "paravirt_inst.h" -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* read ar.itc in advance, and use it before leaving bank 0 */ #define ACCOUNT_GET_STAMP \ (pUStk) mov.m r20=ar.itc; diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 88a794536bc0..a3a3f5a1cb3a 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -77,7 +77,7 @@ static struct clocksource clocksource_itc = { }; static struct clocksource *itc_clocksource; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include @@ -142,7 +142,7 @@ void vtime_account_idle(struct task_struct *tsk) account_idle_time(vtime_delta(tsk)); } -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static irqreturn_t timer_interrupt (int irq, void *dev_id) diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig index 29bb11ec6c64..4f35fc462385 100644 --- a/arch/powerpc/configs/chroma_defconfig +++ b/arch/powerpc/configs/chroma_defconfig @@ -1,6 +1,6 @@ CONFIG_PPC64=y CONFIG_PPC_BOOK3E_64=y -# CONFIG_VIRT_CPU_ACCOUNTING is not set +# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set CONFIG_SMP=y CONFIG_NR_CPUS=256 CONFIG_EXPERIMENTAL=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 88fa5c46f66f..f7df8362911f 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -1,6 +1,6 @@ CONFIG_PPC64=y CONFIG_PPC_BOOK3E_64=y -# CONFIG_VIRT_CPU_ACCOUNTING is not set +# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_EXPERIMENTAL=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 840a2c2d0430..bcedeea0df89 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -1,6 +1,6 @@ CONFIG_PPC64=y CONFIG_ALTIVEC=y -# CONFIG_VIRT_CPU_ACCOUNTING is not set +# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_EXPERIMENTAL=y diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 483733bd06d4..607559ab271f 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in + * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in * the same units as the timebase. Otherwise we measure cpu time * in jiffies using the generic definitions. */ @@ -16,7 +16,7 @@ #ifndef __POWERPC_CPUTIME_H #define __POWERPC_CPUTIME_H -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include #ifdef __KERNEL__ static inline void setup_cputime_one_jiffy(void) { } @@ -231,5 +231,5 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk) static inline void arch_vtime_task_switch(struct task_struct *tsk) { } #endif /* __KERNEL__ */ -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* __POWERPC_CPUTIME_H */ diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 531fe0c3108f..b1e7f2af1016 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -145,7 +145,7 @@ struct dtl_entry { extern struct kmem_cache *dtl_cache; /* - * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls + * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls * reading from the dispatch trace log. If other code wants to consume * DTL entries, it can set this pointer to a function that will get * called once for each DTL entry that gets processed. diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index ea2a86e8ff95..2d0e1f5d8339 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -24,7 +24,7 @@ * user_time and system_time fields in the paca. */ -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #define ACCOUNT_CPU_USER_ENTRY(ra, rb) #define ACCOUNT_CPU_USER_EXIT(ra, rb) #define ACCOUNT_STOLEN_TIME @@ -70,7 +70,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #endif /* CONFIG_PPC_SPLPAR */ -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ /* * Macros for storing registers into and loading registers from diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index b310a0573625..a0ca42fb1541 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -94,7 +94,7 @@ system_call_common: addi r9,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r9) /* "regshere" marker */ -#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR) +#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR) BEGIN_FW_FTR_SECTION beq 33f /* if from user, see if there are any DTL entries to process */ @@ -110,7 +110,7 @@ BEGIN_FW_FTR_SECTION addi r9,r1,STACK_FRAME_OVERHEAD 33: END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */ /* * A syscall should always be called with interrupts enabled diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 6f6b1cccc916..22c9b67f9983 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq); unsigned long ppc_tb_freq; EXPORT_SYMBOL_GPL(ppc_tb_freq); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Factors for converting from cputime_t (timebase ticks) to * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). @@ -377,7 +377,7 @@ void vtime_account_user(struct task_struct *tsk) account_user_time(tsk, utime, utimescaled); } -#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ +#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #define calc_cputime_factors() #endif diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index a7648543c59e..0cc0ac07a55d 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -57,7 +57,7 @@ static u8 dtl_event_mask = 0x7; */ static int dtl_buf_entries = N_DISPATCH_LOG; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE struct dtl_ring { u64 write_index; struct dtl_entry *write_ptr; @@ -142,7 +142,7 @@ static u64 dtl_current_index(struct dtl *dtl) return per_cpu(dtl_rings, dtl->cpu).write_index; } -#else /* CONFIG_VIRT_CPU_ACCOUNTING */ +#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static int dtl_start(struct dtl *dtl) { @@ -188,7 +188,7 @@ static u64 dtl_current_index(struct dtl *dtl) { return lppaca_of(dtl->cpu).dtl_idx; } -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static int dtl_enable(struct dtl *dtl) { diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index ca55882465d6..527e12c9573b 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = { struct kmem_cache *dtl_cache; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Allocate space for the dispatch trace log for all possible cpus * and register the buffers with the hypervisor. This is used for @@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void) return 0; } -#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static inline int alloc_dispatch_logs(void) { return 0; } -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static int alloc_dispatch_log_kmem_cache(void) { diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index c6eddf50eaf9..51969436b8b8 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -4,6 +4,12 @@ #include #include -#include +#ifndef CONFIG_VIRT_CPU_ACCOUNTING +# include +#endif + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +# include +#endif #endif diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index c73d182f4751..b6485cafb7bd 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -26,6 +26,7 @@ typedef u64 __nocast cputime64_t; */ #define cputime_to_jiffies(__ct) \ ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) +#define cputime_to_scaled(__ct) (__ct) #define jiffies_to_cputime(__jif) \ (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) #define cputime64_to_jiffies64(__ct) \ @@ -33,6 +34,13 @@ typedef u64 __nocast cputime64_t; #define jiffies64_to_cputime64(__jif) \ (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ)) + +/* + * Convert cputime <-> nanoseconds + */ +#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs)) + + /* * Convert cputime <-> microseconds */ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 66b70780e910..ed5f6ed6eb77 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t) extern void account_steal_time(cputime_t); extern void account_idle_time(cputime_t); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline void account_process_tick(struct task_struct *tsk, int user) { vtime_account_user(tsk); diff --git a/include/linux/vtime.h b/include/linux/vtime.h index ae30ab58431a..21ef703d1b25 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -14,9 +14,25 @@ extern void vtime_account(struct task_struct *tsk); static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } +static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_account(struct task_struct *tsk) { } #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +static inline void arch_vtime_task_switch(struct task_struct *tsk) { } +static inline void vtime_user_enter(struct task_struct *tsk) +{ + vtime_account_system(tsk); +} +static inline void vtime_user_exit(struct task_struct *tsk) +{ + vtime_account_user(tsk); +} +#else +static inline void vtime_user_enter(struct task_struct *tsk) { } +static inline void vtime_user_exit(struct task_struct *tsk) { } +#endif + #ifdef CONFIG_IRQ_TIME_ACCOUNTING extern void irqtime_account_irq(struct task_struct *tsk); #else diff --git a/init/Kconfig b/init/Kconfig index be8b7f55312d..a05f843e7e52 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -326,6 +326,9 @@ source "kernel/time/Kconfig" menu "CPU/Task time and stats accounting" +config VIRT_CPU_ACCOUNTING + bool + choice prompt "Cputime accounting" default TICK_CPU_ACCOUNTING if !PPC64 @@ -342,9 +345,10 @@ config TICK_CPU_ACCOUNTING If unsure, say Y. -config VIRT_CPU_ACCOUNTING +config VIRT_CPU_ACCOUNTING_NATIVE bool "Deterministic task and CPU time accounting" depends on HAVE_VIRT_CPU_ACCOUNTING + select VIRT_CPU_ACCOUNTING help Select this option to enable more accurate task and CPU time accounting. This is done by reading a CPU counter on each @@ -354,6 +358,23 @@ config VIRT_CPU_ACCOUNTING this also enables accounting of stolen time on logically-partitioned systems. +config VIRT_CPU_ACCOUNTING_GEN + bool "Full dynticks CPU time accounting" + depends on HAVE_CONTEXT_TRACKING && 64BIT + select VIRT_CPU_ACCOUNTING + select CONTEXT_TRACKING + help + Select this option to enable task and CPU time accounting on full + dynticks systems. This accounting is implemented by watching every + kernel-user boundaries using the context tracking subsystem. + The accounting is thus performed at the expense of some significant + overhead. + + For now this is only useful if you are working on the full + dynticks subsystem development. + + If unsure, say N. + config IRQ_TIME_ACCOUNTING bool "Fine granularity task level IRQ time accounting" depends on HAVE_IRQ_TIME_ACCOUNTING diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 54f471e536dc..9002e92e6372 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -30,8 +30,9 @@ void user_enter(void) local_irq_save(flags); if (__this_cpu_read(context_tracking.active) && __this_cpu_read(context_tracking.state) != IN_USER) { - __this_cpu_write(context_tracking.state, IN_USER); + vtime_user_enter(current); rcu_user_enter(); + __this_cpu_write(context_tracking.state, IN_USER); } local_irq_restore(flags); } @@ -53,8 +54,9 @@ void user_exit(void) local_irq_save(flags); if (__this_cpu_read(context_tracking.state) == IN_USER) { - __this_cpu_write(context_tracking.state, IN_KERNEL); rcu_user_exit(); + vtime_user_exit(current); + __this_cpu_write(context_tracking.state, IN_KERNEL); } local_irq_restore(flags); } diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5849448b981e..1c964eced92c 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "sched.h" @@ -479,7 +480,9 @@ void vtime_task_switch(struct task_struct *prev) else vtime_account_system(prev); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE vtime_account_user(prev); +#endif arch_vtime_task_switch(prev); } #endif @@ -495,10 +498,24 @@ void vtime_task_switch(struct task_struct *prev) #ifndef __ARCH_HAS_VTIME_ACCOUNT void vtime_account(struct task_struct *tsk) { - if (in_interrupt() || !is_idle_task(tsk)) - vtime_account_system(tsk); - else - vtime_account_idle(tsk); + if (!in_interrupt()) { + /* + * If we interrupted user, context_tracking_in_user() + * is 1 because the context tracking don't hook + * on irq entry/exit. This way we know if + * we need to flush user time on kernel entry. + */ + if (context_tracking_in_user()) { + vtime_account_user(tsk); + return; + } + + if (is_idle_task(tsk)) { + vtime_account_idle(tsk); + return; + } + } + vtime_account_system(tsk); } EXPORT_SYMBOL_GPL(vtime_account); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ @@ -583,3 +600,39 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); } #endif + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +static DEFINE_PER_CPU(unsigned long long, cputime_snap); + +static cputime_t get_vtime_delta(void) +{ + unsigned long long delta; + + delta = sched_clock() - __this_cpu_read(cputime_snap); + __this_cpu_add(cputime_snap, delta); + + /* CHECKME: always safe to convert nsecs to cputime? */ + return nsecs_to_cputime(delta); +} + +void vtime_account_system(struct task_struct *tsk) +{ + cputime_t delta_cpu = get_vtime_delta(); + + account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); +} + +void vtime_account_user(struct task_struct *tsk) +{ + cputime_t delta_cpu = get_vtime_delta(); + + account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); +} + +void vtime_account_idle(struct task_struct *tsk) +{ + cputime_t delta_cpu = get_vtime_delta(); + + account_idle_time(delta_cpu); +} +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ -- cgit v1.2.3 From 3f4724ea85b7d9055a9976fa8f30b471bdfbca93 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 16 Jul 2012 18:00:34 +0200 Subject: cputime: Allow dynamic switch between tick/virtual based cputime accounting Allow to dynamically switch between tick and virtual based cputime accounting. This way we can provide a kind of "on-demand" virtual based cputime accounting. In this mode, the kernel relies on the context tracking subsystem to dynamically probe on kernel boundaries. This is in preparation for being able to stop the timer tick in more places than just the idle state. Doing so will depend on CONFIG_VIRT_CPU_ACCOUNTING_GEN which makes it possible to account the cputime without the tick by hooking on kernel/user boundaries. Depending whether the tick is stopped or not, we can switch between tick and vtime based accounting anytime in order to minimize the overhead associated to user hooks. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/vtime.h | 8 ++++++++ kernel/sched/cputime.c | 41 +++++++++++++++++++++++++++++++---------- kernel/time/tick-sched.c | 5 ++++- 3 files changed, 43 insertions(+), 11 deletions(-) diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 21ef703d1b25..5368af9bdf06 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -10,12 +10,20 @@ extern void vtime_account_system_irqsafe(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); extern void vtime_account(struct task_struct *tsk); + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern bool vtime_accounting_enabled(void); #else +static inline bool vtime_accounting_enabled(void) { return true; } +#endif + +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_account(struct task_struct *tsk) { } +static inline bool vtime_accounting_enabled(void) { return false; } #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 1c964eced92c..e1939d38bf73 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -317,8 +317,6 @@ out: rcu_read_unlock(); } -#ifndef CONFIG_VIRT_CPU_ACCOUNTING - #ifdef CONFIG_IRQ_TIME_ACCOUNTING /* * Account a tick to a process and cpustat @@ -383,11 +381,12 @@ static void irqtime_account_idle_ticks(int ticks) irqtime_account_process_tick(current, 0, rq); } #else /* CONFIG_IRQ_TIME_ACCOUNTING */ -static void irqtime_account_idle_ticks(int ticks) {} -static void irqtime_account_process_tick(struct task_struct *p, int user_tick, +static inline void irqtime_account_idle_ticks(int ticks) {} +static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, struct rq *rq) {} #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Account a single tick of cpu time. * @p: the process that the cpu time gets accounted to @@ -398,6 +397,9 @@ void account_process_tick(struct task_struct *p, int user_tick) cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); struct rq *rq = this_rq(); + if (vtime_accounting_enabled()) + return; + if (sched_clock_irqtime) { irqtime_account_process_tick(p, user_tick, rq); return; @@ -439,8 +441,7 @@ void account_idle_ticks(unsigned long ticks) account_idle_time(jiffies_to_cputime(ticks)); } - -#endif +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ /* * Use precise platform statistics if available: @@ -475,6 +476,9 @@ EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe); #ifndef __ARCH_HAS_VTIME_TASK_SWITCH void vtime_task_switch(struct task_struct *prev) { + if (!vtime_accounting_enabled()) + return; + if (is_idle_task(prev)) vtime_account_idle(prev); else @@ -498,6 +502,9 @@ void vtime_task_switch(struct task_struct *prev) #ifndef __ARCH_HAS_VTIME_ACCOUNT void vtime_account(struct task_struct *tsk) { + if (!vtime_accounting_enabled()) + return; + if (!in_interrupt()) { /* * If we interrupted user, context_tracking_in_user() @@ -520,7 +527,7 @@ void vtime_account(struct task_struct *tsk) EXPORT_SYMBOL_GPL(vtime_account); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ -#else +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) { @@ -599,7 +606,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime thread_group_cputime(p, &cputime); cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); } -#endif +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN static DEFINE_PER_CPU(unsigned long long, cputime_snap); @@ -617,14 +624,23 @@ static cputime_t get_vtime_delta(void) void vtime_account_system(struct task_struct *tsk) { - cputime_t delta_cpu = get_vtime_delta(); + cputime_t delta_cpu; + + if (!vtime_accounting_enabled()) + return; + delta_cpu = get_vtime_delta(); account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); } void vtime_account_user(struct task_struct *tsk) { - cputime_t delta_cpu = get_vtime_delta(); + cputime_t delta_cpu; + + if (!vtime_accounting