aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-05-14 14:43:40 +0200
committerIngo Molnar <mingo@kernel.org>2012-05-14 14:43:40 +0200
commit9cba26e66d09bf394ae5a739627a1dc8b7cae6f4 (patch)
treef03743d576a0c7826b9921ad47e70370ebe80a22 /kernel
parentec83db0f78cd44c3b586ec1c3a348d1a8a389797 (diff)
parent73eff9f56e15598c8399c0b86899fd889b97f085 (diff)
Merge branch 'perf/uprobes' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/uprobes
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/events/core.c25
-rw-r--r--kernel/irq/Kconfig2
-rw-r--r--kernel/irq/irqdomain.c47
-rw-r--r--kernel/irq_work.c1
-rw-r--r--kernel/itimer.c8
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/sched/core.c9
-rw-r--r--kernel/time/Kconfig4
-rw-r--r--kernel/time/tick-broadcast.c4
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/trace/ftrace.c44
-rw-r--r--kernel/trace/ring_buffer.c248
-rw-r--r--kernel/trace/trace.c453
-rw-r--r--kernel/trace/trace.h4
-rw-r--r--kernel/trace/trace_printk.c4
16 files changed, 506 insertions, 355 deletions
diff --git a/kernel/cred.c b/kernel/cred.c
index 97b36eeca4c9..e70683d9ec32 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -386,6 +386,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
struct cred *new;
int ret;
+ p->replacement_session_keyring = NULL;
+
if (
#ifdef CONFIG_KEYS
!p->cred->thread_keyring &&
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a6a9ec4cd8f5..e82c7a1face9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2039,8 +2039,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
* accessing the event control register. If a NMI hits, then it will
* not restart the event.
*/
-void __perf_event_task_sched_out(struct task_struct *task,
- struct task_struct *next)
+static void __perf_event_task_sched_out(struct task_struct *task,
+ struct task_struct *next)
{
int ctxn;
@@ -2279,8 +2279,8 @@ static void perf_branch_stack_sched_in(struct task_struct *prev,
* accessing the event control register. If a NMI hits, then it will
* keep the event running.
*/
-void __perf_event_task_sched_in(struct task_struct *prev,
- struct task_struct *task)
+static void __perf_event_task_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
struct perf_event_context *ctx;
int ctxn;
@@ -2305,6 +2305,12 @@ void __perf_event_task_sched_in(struct task_struct *prev,
perf_branch_stack_sched_in(prev, task);
}
+void __perf_event_task_sched(struct task_struct *prev, struct task_struct *next)
+{
+ __perf_event_task_sched_out(prev, next);
+ __perf_event_task_sched_in(prev, next);
+}
+
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
{
u64 frequency = event->attr.sample_freq;
@@ -4957,7 +4963,7 @@ void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
if (rctx < 0)
return;
- perf_sample_data_init(&data, addr);
+ perf_sample_data_init(&data, addr, 0);
do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
@@ -5215,7 +5221,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
.data = record,
};
- perf_sample_data_init(&data, addr);
+ perf_sample_data_init(&data, addr, 0);
data.raw = &raw;
hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
@@ -5318,7 +5324,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample;
struct pt_regs *regs = data;
- perf_sample_data_init(&sample, bp->attr.bp_addr);
+ perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
if (!bp->hw.state && !perf_exclude_event(bp, regs))
perf_swevent_event(bp, 1, &sample, regs);
@@ -5344,13 +5350,12 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
event->pmu->read(event);
- perf_sample_data_init(&data, 0);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
regs = get_irq_regs();
if (regs && !perf_exclude_event(event, regs)) {
if (!(event->attr.exclude_idle && is_idle_task(current)))
- if (perf_event_overflow(event, &data, regs))
+ if (__perf_event_overflow(event, 1, &data, regs))
ret = HRTIMER_NORESTART;
}
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index cf1a4a68ce44..d1a758bc972a 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -62,7 +62,7 @@ config IRQ_DOMAIN_DEBUG
help
This option will show the mapping relationship between hardware irq
numbers and Linux irq numbers. The mapping is exposed via debugfs
- in the file "virq_mapping".
+ in the file "irq_domain_mapping".
If you don't know what this means you don't need it.
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 3601f3fbf67c..0e0ba5f840b2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -23,7 +23,6 @@ static LIST_HEAD(irq_domain_list);
static DEFINE_MUTEX(irq_domain_mutex);
static DEFINE_MUTEX(revmap_trees_mutex);
-static unsigned int irq_virq_count = NR_IRQS;
static struct irq_domain *irq_default_domain;
/**
@@ -184,13 +183,16 @@ struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
}
struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
+ unsigned int max_irq,
const struct irq_domain_ops *ops,
void *host_data)
{
struct irq_domain *domain = irq_domain_alloc(of_node,
IRQ_DOMAIN_MAP_NOMAP, ops, host_data);
- if (domain)
+ if (domain) {
+ domain->revmap_data.nomap.max_irq = max_irq ? max_irq : ~0;
irq_domain_add(domain);
+ }
return domain;
}
@@ -262,22 +264,6 @@ void irq_set_default_host(struct irq_domain *domain)
irq_default_domain = domain;
}
-/**
- * irq_set_virq_count() - Set the maximum number of linux irqs
- * @count: number of linux irqs, capped with NR_IRQS
- *
- * This is mainly for use by platforms like iSeries who want to program
- * the virtual irq number in the controller to avoid the reverse mapping
- */
-void irq_set_virq_count(unsigned int count)
-{
- pr_debug("irq: Trying to set virq count to %d\n", count);
-
- BUG_ON(count < NUM_ISA_INTERRUPTS);
- if (count < NR_IRQS)
- irq_virq_count = count;
-}
-
static int irq_setup_virq(struct irq_domain *domain, unsigned int virq,
irq_hw_number_t hwirq)
{
@@ -320,13 +306,12 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
pr_debug("irq: create_direct virq allocation failed\n");
return 0;
}
- if (virq >= irq_virq_count) {
+ if (virq >= domain->revmap_data.nomap.max_irq) {
pr_err("ERROR: no free irqs available below %i maximum\n",
- irq_virq_count);
+ domain->revmap_data.nomap.max_irq);
irq_free_desc(virq);
return 0;
}
-
pr_debug("irq: create_direct obtained virq %d\n", virq);
if (irq_setup_virq(domain, virq, virq)) {
@@ -350,7 +335,8 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
unsigned int irq_create_mapping(struct irq_domain *domain,
irq_hw_number_t hwirq)
{
- unsigned int virq, hint;
+ unsigned int hint;
+ int virq;
pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
@@ -377,13 +363,13 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
return irq_domain_legacy_revmap(domain, hwirq);
/* Allocate a virtual interrupt number */
- hint = hwirq % irq_virq_count;
+ hint = hwirq % nr_irqs;
if (hint == 0)
hint++;
virq = irq_alloc_desc_from(hint, 0);
- if (!virq)
+ if (virq <= 0)
virq = irq_alloc_desc_from(1, 0);
- if (!virq) {
+ if (virq <= 0) {
pr_debug("irq: -> virq allocation failed\n");
return 0;
}
@@ -515,7 +501,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
irq_hw_number_t hwirq)
{
unsigned int i;
- unsigned int hint = hwirq % irq_virq_count;
+ unsigned int hint = hwirq % nr_irqs;
/* Look for default domain if nececssary */
if (domain == NULL)
@@ -536,7 +522,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
if (data && (data->domain == domain) && (data->hwirq == hwirq))
return i;
i++;
- if (i >= irq_virq_count)
+ if (i >= nr_irqs)
i = 1;
} while(i != hint);
return 0;
@@ -642,8 +628,9 @@ static int virq_debug_show(struct seq_file *m, void *private)
void *data;
int i;
- seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq",
- "chip name", "chip data", "domain name");
+ seq_printf(m, "%-5s %-7s %-15s %-*s %s\n", "irq", "hwirq",
+ "chip name", (int)(2 * sizeof(void *) + 2), "chip data",
+ "domain name");
for (i = 1; i < nr_irqs; i++) {
desc = irq_to_desc(i);
@@ -666,7 +653,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
seq_printf(m, "%-15s ", p);
data = irq_desc_get_chip_data(desc);
- seq_printf(m, "0x%16p ", data);
+ seq_printf(m, data ? "0x%p " : " %p ", data);
if (desc->irq_data.domain && desc->irq_data.domain->of_node)
p = desc->irq_data.domain->of_node->full_name;
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 0c56d44b9fd5..1588e3b2871b 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -11,6 +11,7 @@
#include <linux/irq_work.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
+#include <linux/irqflags.h>
#include <asm/processor.h>
/*
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 22000c3db0dd..8d262b467573 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -284,8 +284,12 @@ SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
if (value) {
if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
return -EFAULT;
- } else
- memset((char *) &set_buffer, 0, sizeof(set_buffer));
+ } else {
+ memset(&set_buffer, 0, sizeof(set_buffer));
+ printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer."
+ " Misfeature support will be removed\n",
+ current->comm);
+ }
error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
if (error || !ovalue)
diff --git a/kernel/panic.c b/kernel/panic.c
index 80aed44e345a..8ed89a175d79 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -97,7 +97,7 @@ void panic(const char *fmt, ...)
/*
* Avoid nested stack-dumping if a panic occurs during oops processing
*/
- if (!oops_in_progress)
+ if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
dump_stack();
#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4603b9d8f30a..5c692a0a555d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1913,7 +1913,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
sched_info_switch(prev, next);
- perf_event_task_sched_out(prev, next);
+ perf_event_task_sched(prev, next);
fire_sched_out_preempt_notifiers(prev, next);
prepare_lock_switch(rq, next);
prepare_arch_switch(next);
@@ -1956,13 +1956,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
*/
prev_state = prev->state;
finish_arch_switch(prev);
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
- local_irq_disable();
-#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
- perf_event_task_sched_in(prev, current);
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
- local_irq_enable();
-#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
finish_lock_switch(rq, prev);
finish_arch_post_lock_switch();
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 2cf9cc7aa103..a20dc8a3c949 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -1,6 +1,10 @@
#
# Timer subsystem related configuration options
#
+
+# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is
+# only related to the tick functionality. Oneshot clockevent devices
+# are supported independ of this.
config TICK_ONESHOT
bool
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index e883f57a3cd3..bf57abdc7bd0 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -575,10 +575,12 @@ void tick_broadcast_switch_to_oneshot(void)
unsigned long flags;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
+
+ tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
+
if (cpumask_empty(tick_get_broadcast_mask()))
goto end;
- tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
bc = tick_broadcast_device.evtdev;
if (bc)
tick_broadcast_setup_oneshot(bc);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3526038f2836..6a3a5b9ff561 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -534,9 +534,9 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
hrtimer_get_expires(&ts->sched_timer), 0))
break;
}
- /* Update jiffies and reread time */
- tick_do_update_jiffies64(now);
+ /* Reread time and update jiffies */
now = ktime_get();
+ tick_do_update_jiffies64(now);
}
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0fa92f677c92..cf81f27ce6c6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2469,57 +2469,35 @@ static int
ftrace_avail_open(struct inode *inode, struct file *file)
{
struct ftrace_iterator *iter;
- int ret;
if (unlikely(ftrace_disabled))
return -ENODEV;
- iter = kzalloc(sizeof(*iter), GFP_KERNEL);
- if (!iter)
- return -ENOMEM;
-
- iter->pg = ftrace_pages_start;
- iter->ops = &global_ops;
-
- ret = seq_open(file, &show_ftrace_seq_ops);
- if (!ret) {
- struct seq_file *m = file->private_data;
-
- m->private = iter;
- } else {
- kfree(iter);
+ iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
+ if (iter) {
+ iter->pg = ftrace_pages_start;
+ iter->ops = &global_ops;
}
- return ret;
+ return iter ? 0 : -ENOMEM;
}
static int
ftrace_enabled_open(struct inode *inode, struct file *file)
{
struct ftrace_iterator *iter;
- int ret;
if (unlikely(ftrace_disabled))
return -ENODEV;
- iter = kzalloc(sizeof(*iter), GFP_KERNEL);
- if (!iter)
- return -ENOMEM;
-
- iter->pg = ftrace_pages_start;
- iter->flags = FTRACE_ITER_ENABLED;
- iter->ops = &global_ops;
-
- ret = seq_open(file, &show_ftrace_seq_ops);
- if (!ret) {
- struct seq_file *m = file->private_data;
-
- m->private = iter;
- } else {
- kfree(iter);
+ iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
+ if (iter) {
+ iter->pg = ftrace_pages_start;
+ iter->flags = FTRACE_ITER_ENABLED;
+ iter->ops = &global_ops;
}
- return ret;
+ return iter ? 0 : -ENOMEM;
}
static void ftrace_filter_reset(struct ftrace_hash *hash)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cf8d11e91efd..2d5eb3320827 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -449,6 +449,7 @@ struct ring_buffer_per_cpu {
raw_spinlock_t reader_lock; /* serialize readers */
arch_spinlock_t lock;
struct lock_class_key lock_key;
+ unsigned int nr_pages;
struct list_head *pages;
struct buffer_page *head_page; /* read from head */
struct buffer_page *tail_page; /* write to tail */
@@ -466,10 +467,12 @@ struct ring_buffer_per_cpu {
unsigned long read_bytes;
u64 write_stamp;
u64 read_stamp;
+ /* ring buffer pages to update, > 0 to add, < 0 to remove */
+ int nr_pages_to_update;
+ struct list_head new_pages; /* new pages to add */
};
struct ring_buffer {
- unsigned pages;
unsigned flags;
int cpus;
atomic_t record_disabled;
@@ -963,14 +966,10 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
return 0;
}
-static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
- unsigned nr_pages)
+static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu)
{
+ int i;
struct buffer_page *bpage, *tmp;
- LIST_HEAD(pages);
- unsigned i;
-
- WARN_ON(!nr_pages);
for (i = 0; i < nr_pages; i++) {
struct page *page;
@@ -981,15 +980,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
*/
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL | __GFP_NORETRY,
- cpu_to_node(cpu_buffer->cpu));
+ cpu_to_node(cpu));
if (!bpage)
goto free_pages;
- rb_check_bpage(cpu_buffer, bpage);
+ list_add(&bpage->list, pages);
- list_add(&bpage->list, &pages);
-
- page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
+ page = alloc_pages_node(cpu_to_node(cpu),
GFP_KERNEL | __GFP_NORETRY, 0);
if (!page)
goto free_pages;
@@ -997,6 +994,27 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
rb_init_page(bpage->page);
}
+ return 0;
+
+free_pages:
+ list_for_each_entry_safe(bpage, tmp, pages, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
+ }
+
+ return -ENOMEM;
+}
+
+static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+ unsigned nr_pages)
+{
+ LIST_HEAD(pages);
+
+ WARN_ON(!nr_pages);
+
+ if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
+ return -ENOMEM;
+
/*
* The ring buffer page list is a circular list that does not
* start and end with a list head. All page list items point to
@@ -1005,20 +1023,15 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
cpu_buffer->pages = pages.next;
list_del(&pages);
+ cpu_buffer->nr_pages = nr_pages;
+
rb_check_pages(cpu_buffer);
return 0;
-
- free_pages:
- list_for_each_entry_safe(bpage, tmp, &pages, list) {
- list_del_init(&bpage->list);
- free_buffer_page(bpage);
- }
- return -ENOMEM;
}
static struct ring_buffer_per_cpu *
-rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
+rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct buffer_page *bpage;
@@ -1052,7 +1065,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
- ret = rb_allocate_pages(cpu_buffer, buffer->pages);
+ ret = rb_allocate_pages(cpu_buffer, nr_pages);
if (ret < 0)
goto fail_free_reader;
@@ -1113,7 +1126,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
{
struct ring_buffer *buffer;
int bsize;
- int cpu;
+ int cpu, nr_pages;
/* keep it in its own cache line */
buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
@@ -1124,14 +1137,14 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
goto fail_free_buffer;
- buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+ nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
buffer->flags = flags;
buffer->clock = trace_clock_local;
buffer->reader_lock_key = key;
/* need at least two pages */
- if (buffer->pages < 2)
- buffer->pages = 2;
+ if (nr_pages < 2)
+ nr_pages = 2;
/*
* In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -1154,7 +1167,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
for_each_buffer_cpu(buffer, cpu) {
buffer->buffers[cpu] =
- rb_allocate_cpu_buffer(buffer, cpu);
+ rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
if (!buffer->buffers[cpu])
goto fail_free_buffers;
}
@@ -1276,6 +1289,18 @@ out:
raw_spin_unlock_irq(&cpu_buffer->reader_lock);
}
+static void update_pages_handler(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ if (cpu_buffer->nr_pages_to_update > 0)
+ rb_insert_pages(cpu_buffer, &cpu_buffer->new_pages,
+ cpu_buffer->nr_pages_to_update);
+ else
+ rb_remove_pages(cpu_buffer, -cpu_buffer->nr_pages_to_update);
+ cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
+ /* reset this value */
+ cpu_buffer->nr_pages_to_update = 0;
+}
+
/**
* ring_buffer_resize - resize the ring buffer
* @buffer: the buffer to resize.
@@ -1285,14 +1310,12 @@ out:
*
* Returns -1 on failure.
*/
-int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
+ int cpu_id)
{
struct ring_buffer_per_cpu *cpu_buffer;
- unsigned nr_pages, rm_pages, new_pages;
- struct buffer_page *bpage, *tmp;
- unsigned long buffer_size;
- LIST_HEAD(pages);
- int i, cpu;
+ unsigned nr_pages;
+ int cpu;
/*
* Always succeed at resizing a non-existent buffer:
@@ -1302,15 +1325,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
size *= BUF_PAGE_SIZE;
- buffer_size = buffer->pages * BUF_PAGE_SIZE;
/* we need a minimum of two pages */
if (size < BUF_PAGE_SIZE * 2)
size = BUF_PAGE_SIZE * 2;
- if (size == buffer_size)
- return size;
-
atomic_inc(&buffer->record_disabled);
/* Make sure all writers are done with this buffer. */
@@ -1321,68 +1340,56 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
- if (size < buffer_size) {
-
- /* easy case, just free pages */
- if (RB_WARN_ON(buffer, nr_pages >= buffer->pages))
- goto out_fail;
-
- rm_pages = buffer->pages - nr_pages;
-
+ if (cpu_id == RING_BUFFER_ALL_CPUS) {
+ /* calculate the pages to update */
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
- rb_remove_pages(cpu_buffer, rm_pages);
- }
- goto out;
- }
- /*
- * This is a bit more difficult. We only want to add pages
- * when we can allocate enough for all CPUs. We do this
- * by allocating all the pages and storing them on a local
- * link list. If we succeed in our allocation, then we
- * add these pages to the cpu_buffers. Otherwise we just free
- * them all and return -ENOMEM;
- */
- if (RB_WARN_ON(buffer, nr_pages <= buffer->pages))
- goto out_fail;
+ cpu_buffer->nr_pages_to_update = nr_pages -
+ cpu_buffer->nr_pages;
- new_pages = nr_pages - buffer->pages;
+ /*
+ * nothing more to do for removing pages or no update
+ */
+ if (cpu_buffer->nr_pages_to_update <= 0)
+ continue;
- for_each_buffer_cpu(buffer, cpu) {
- for (i = 0; i < new_pages; i++) {
- struct page *page;
/*
- * __GFP_NORETRY flag makes sure that the allocation
- * fails gracefully without invoking oom-killer and
- * the system is not destabilized.
+ * to add pages, make sure all new pages can be
+ * allocated without receiving ENOMEM
*/
- bpage = kzalloc_node(ALIGN(sizeof(*bpage),
- cache_line_size()),
- GFP_KERNEL | __GFP_NORETRY,
- cpu_to_node(cpu));
- if (!bpage)
- goto free_pages;
- list_add(&bpage->list, &pages);
- page = alloc_pages_node(cpu_to_node(cpu),
- GFP_KERNEL | __GFP_NORETRY, 0);
- if (!page)
- goto free_pages;
- bpage->page = page_address(page);
- rb_init_page(bpage->page);
+ INIT_LIST_HEAD(&cpu_buffer->new_pages);
+ if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
+ &cpu_buffer->new_pages, cpu))
+ /* not enough memory for new pages */
+ goto no_mem;
}
- }
- for_each_buffer_cpu(buffer, cpu) {
- cpu_buffer = buffer->buffers[cpu];
- rb_insert_pages(cpu_buffer, &pages, new_pages);
- }
+ /* wait for all the updates to complete */
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ if (cpu_buffer->nr_pages_to_update) {
+ update_pages_handler(cpu_buffer);
+ }
+ }
+ } else {
+ cpu_buffer = buffer->buffers[cpu_id];
+ if (nr_pages == cpu_buffer->nr_pages)
+ goto out;
- if (RB_WARN_ON(buffer, !list_empty(&pages)))
- goto out_fail;
+ cpu_buffer->nr_pages_to_update = nr_pages -
+ cpu_buffer->nr_pages;
+
+ INIT_LIST_HEAD(&cpu_buffer->new_pages);
+ if (cpu_buffer->nr_pages_to_update > 0 &&
+ __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
+ &cpu_buffer->new_pages, cpu_id))
+ goto no_mem;
+
+ update_pages_handler(cpu_buffer);
+ }
out:
- buffer->pages = nr_pages;
put_online_cpus();
mutex_unlock(&buffer->mutex);
@@ -1390,25 +1397,24 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
return size;
- free_pages:
- list_for_each_entry_safe(bpage, tmp, &pages, list) {
- list_del_init(&bpage->list);
- free_buffer_page(bpage);
+ no_mem:
+ for_each_buffer_cpu(buffer, cpu) {
+ struct buffer_page *bpage, *tmp;
+ cpu_buffer = buffer->buffers[cpu];
+ /* reset this number regardless */
+ cpu_buffer->nr_pages_to_update = 0;
+ if (list_empty(&cpu_buffer->new_pages))
+ continue;
+ list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
+ list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
+ }
}
put_online_cpus();
mutex_unlock(&buffer->mutex);
atomic_dec(&buffer->record_disabled);
return -ENOMEM;
-
- /*
- * Something went totally wrong, and we are too paranoid
- * to even clean up the mess.
- */
- out_fail:
- put_online_cpus();
- mutex_unlock(&buffer->mutex);
- atomic_dec(&buffer->record_disabled);
- return -1;
}
EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -1510,7 +1516,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
* assign the commit to the tail.
*/
again:
- max_count = cpu_buffer->buffer->pages * 100;
+ max_count = cpu_buffer->nr_pages * 100;
while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
if (RB_WARN_ON(cpu_buffer, !(--max_count)))
@@ -3588,9 +3594,18 @@ EXPORT_SYMBOL_GPL(ring_buffer_read);
* ring_buffer_size - return the size of the ring buffer (in bytes)
* @buffer: The ring buffer.
*/
-unsigned long ring_buffer_size(struct ring_buffer *buffer)
+unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
{
- return BUF_PAGE_SIZE * buffer->pages;
+ /*
+ * Earlier, this method returned
+ * BUF_PAGE_SIZE * buffer->nr_pages
+ * Since the nr_pages field is now removed, we have converted this to
+ * return the per cpu buffer value.
+ */
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return 0;
+
+ return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
}
EXPORT_SYMBOL_GPL(ring_buffer_size);
@@ -3765,8 +3780,11 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
!cpumask_test_cpu(cpu, buffer_b->cpumask))
goto out;
+ cpu_buffer_a = buffer_a->buffers[cpu];
+ cpu_buffer_b = buffer_b->buffers[cpu];
+
/* At least make sure the two buffers are somewhat the same */
- if (buffer_a->pages != buffer_b->pages)
+ if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
goto out;
ret = -EAGAIN;
@@ -3780,9 +3798,6 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
if (atomic_read(&buffer_b->record_disabled))
goto out;
- cpu_buffer_a = buffer_a->buffers[cpu];
- cpu_buffer_b = buffer_b->buffers[cpu];
-
if (atomic_read(&cpu_buffer_a->record_disabled))
goto out;
@@ -4071,6 +4086,8 @@ static int rb_cpu_notify(struct notifier_block *self,
struct ring_buffer *buffer =
container_of(self, struct ring_buffer, cpu_notify);
long cpu = (long)hcpu;
+ int cpu_i, nr_pages_same;
+ unsigned int nr_pages;
switch (action) {
case CPU_UP_PREPARE:
@@ -4078,8 +4095,23 @@ static int rb_cpu_notify(struct notifier_block *self,
if (cpumask_test_cpu(cpu, buffer->cpumask))
return NOTIFY_OK;
+ nr_pages = 0;
+ nr_pages_same = 1;
+ /* check if all cpu sizes are same */
+ for_each_buffer_cpu(buffer, cpu_i) {
+ /* fill in the size from first enabled cpu */
+ if (nr_pages == 0)
+ nr_pages = buffer->buffers[cpu_i]->nr_pages;
+ if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
+ nr_pages_same = 0;
+ break;
+ }
+ }
+ /* allocate minimum pages, user can later expand it */
+ if (!nr_pages_same)
+ nr_pages = 2;
buffer->buffers[cpu] =
- rb_allocate_cpu_buffer(buffer, cpu);
+ rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
if (!buffer->buffers[cpu]) {
WARN(1, "failed to allocate ring buffer on CPU %ld\n",
cpu);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ed7b5d1e12f4..48ef4960ec90 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -87,18 +87,6 @@ static int tracing_disabled = 1;
DEFINE_PER_CPU(int, ftrace_cpu_disabled);
-static inline void ftrace_disable_cpu(void)
-{
- preempt_disable();
- __this_cpu_inc(ftrace_cpu_disabled);
-}
-
-static inline void ftrace_enable_cpu(void)
-{
- __this_cpu_dec(ftrace_cpu_disabled);
- preempt_enable();
-}
-
cpumask_var_t __read_mostly tracing_buffer_mask;
/*
@@ -629,7 +617,6 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
{
int len;
- void *ret;
if (s->len <= s->readpos)
return -EBUSY;
@@ -637,9 +624,7 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
len = s->len - s->readpos;
if (cnt > len)
cnt = len;
- ret = memcpy(buf, s->buffer + s->readpos, cnt);
- if (!ret)
- return -EFAULT;
+ memcpy(buf, s->buffer + s->readpos, cnt);
s->readpos += cnt;
return cnt;
@@ -751,8 +736,6 @@ update_m