diff options
| author | Ingo Molnar <mingo@kernel.org> | 2012-05-14 14:43:40 +0200 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2012-05-14 14:43:40 +0200 |
| commit | 9cba26e66d09bf394ae5a739627a1dc8b7cae6f4 (patch) | |
| tree | f03743d576a0c7826b9921ad47e70370ebe80a22 /kernel | |
| parent | ec83db0f78cd44c3b586ec1c3a348d1a8a389797 (diff) | |
| parent | 73eff9f56e15598c8399c0b86899fd889b97f085 (diff) | |
Merge branch 'perf/uprobes' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/uprobes
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cred.c | 2 | ||||
| -rw-r--r-- | kernel/events/core.c | 25 | ||||
| -rw-r--r-- | kernel/irq/Kconfig | 2 | ||||
| -rw-r--r-- | kernel/irq/irqdomain.c | 47 | ||||
| -rw-r--r-- | kernel/irq_work.c | 1 | ||||
| -rw-r--r-- | kernel/itimer.c | 8 | ||||
| -rw-r--r-- | kernel/panic.c | 2 | ||||
| -rw-r--r-- | kernel/sched/core.c | 9 | ||||
| -rw-r--r-- | kernel/time/Kconfig | 4 | ||||
| -rw-r--r-- | kernel/time/tick-broadcast.c | 4 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 4 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 44 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 248 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 453 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 4 | ||||
| -rw-r--r-- | kernel/trace/trace_printk.c | 4 |
16 files changed, 506 insertions, 355 deletions
diff --git a/kernel/cred.c b/kernel/cred.c index 97b36eeca4c9..e70683d9ec32 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -386,6 +386,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct cred *new; int ret; + p->replacement_session_keyring = NULL; + if ( #ifdef CONFIG_KEYS !p->cred->thread_keyring && diff --git a/kernel/events/core.c b/kernel/events/core.c index a6a9ec4cd8f5..e82c7a1face9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2039,8 +2039,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, * accessing the event control register. If a NMI hits, then it will * not restart the event. */ -void __perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next) +static void __perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next) { int ctxn; @@ -2279,8 +2279,8 @@ static void perf_branch_stack_sched_in(struct task_struct *prev, * accessing the event control register. If a NMI hits, then it will * keep the event running. */ -void __perf_event_task_sched_in(struct task_struct *prev, - struct task_struct *task) +static void __perf_event_task_sched_in(struct task_struct *prev, + struct task_struct *task) { struct perf_event_context *ctx; int ctxn; @@ -2305,6 +2305,12 @@ void __perf_event_task_sched_in(struct task_struct *prev, perf_branch_stack_sched_in(prev, task); } +void __perf_event_task_sched(struct task_struct *prev, struct task_struct *next) +{ + __perf_event_task_sched_out(prev, next); + __perf_event_task_sched_in(prev, next); +} + static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) { u64 frequency = event->attr.sample_freq; @@ -4957,7 +4963,7 @@ void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) if (rctx < 0) return; - perf_sample_data_init(&data, addr); + perf_sample_data_init(&data, addr, 0); do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); @@ -5215,7 +5221,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, .data = record, }; - perf_sample_data_init(&data, addr); + perf_sample_data_init(&data, addr, 0); data.raw = &raw; hlist_for_each_entry_rcu(event, node, head, hlist_entry) { @@ -5318,7 +5324,7 @@ void perf_bp_event(struct perf_event *bp, void *data) struct perf_sample_data sample; struct pt_regs *regs = data; - perf_sample_data_init(&sample, bp->attr.bp_addr); + perf_sample_data_init(&sample, bp->attr.bp_addr, 0); if (!bp->hw.state && !perf_exclude_event(bp, regs)) perf_swevent_event(bp, 1, &sample, regs); @@ -5344,13 +5350,12 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) event->pmu->read(event); - perf_sample_data_init(&data, 0); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, event->hw.last_period); regs = get_irq_regs(); if (regs && !perf_exclude_event(event, regs)) { if (!(event->attr.exclude_idle && is_idle_task(current))) - if (perf_event_overflow(event, &data, regs)) + if (__perf_event_overflow(event, 1, &data, regs)) ret = HRTIMER_NORESTART; } diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index cf1a4a68ce44..d1a758bc972a 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -62,7 +62,7 @@ config IRQ_DOMAIN_DEBUG help This option will show the mapping relationship between hardware irq numbers and Linux irq numbers. The mapping is exposed via debugfs - in the file "virq_mapping". + in the file "irq_domain_mapping". If you don't know what this means you don't need it. diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 3601f3fbf67c..0e0ba5f840b2 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -23,7 +23,6 @@ static LIST_HEAD(irq_domain_list); static DEFINE_MUTEX(irq_domain_mutex); static DEFINE_MUTEX(revmap_trees_mutex); -static unsigned int irq_virq_count = NR_IRQS; static struct irq_domain *irq_default_domain; /** @@ -184,13 +183,16 @@ struct irq_domain *irq_domain_add_linear(struct device_node *of_node, } struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, + unsigned int max_irq, const struct irq_domain_ops *ops, void *host_data) { struct irq_domain *domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_NOMAP, ops, host_data); - if (domain) + if (domain) { + domain->revmap_data.nomap.max_irq = max_irq ? max_irq : ~0; irq_domain_add(domain); + } return domain; } @@ -262,22 +264,6 @@ void irq_set_default_host(struct irq_domain *domain) irq_default_domain = domain; } -/** - * irq_set_virq_count() - Set the maximum number of linux irqs - * @count: number of linux irqs, capped with NR_IRQS - * - * This is mainly for use by platforms like iSeries who want to program - * the virtual irq number in the controller to avoid the reverse mapping - */ -void irq_set_virq_count(unsigned int count) -{ - pr_debug("irq: Trying to set virq count to %d\n", count); - - BUG_ON(count < NUM_ISA_INTERRUPTS); - if (count < NR_IRQS) - irq_virq_count = count; -} - static int irq_setup_virq(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq) { @@ -320,13 +306,12 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) pr_debug("irq: create_direct virq allocation failed\n"); return 0; } - if (virq >= irq_virq_count) { + if (virq >= domain->revmap_data.nomap.max_irq) { pr_err("ERROR: no free irqs available below %i maximum\n", - irq_virq_count); + domain->revmap_data.nomap.max_irq); irq_free_desc(virq); return 0; } - pr_debug("irq: create_direct obtained virq %d\n", virq); if (irq_setup_virq(domain, virq, virq)) { @@ -350,7 +335,8 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) unsigned int irq_create_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { - unsigned int virq, hint; + unsigned int hint; + int virq; pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); @@ -377,13 +363,13 @@ unsigned int irq_create_mapping(struct irq_domain *domain, return irq_domain_legacy_revmap(domain, hwirq); /* Allocate a virtual interrupt number */ - hint = hwirq % irq_virq_count; + hint = hwirq % nr_irqs; if (hint == 0) hint++; virq = irq_alloc_desc_from(hint, 0); - if (!virq) + if (virq <= 0) virq = irq_alloc_desc_from(1, 0); - if (!virq) { + if (virq <= 0) { pr_debug("irq: -> virq allocation failed\n"); return 0; } @@ -515,7 +501,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { unsigned int i; - unsigned int hint = hwirq % irq_virq_count; + unsigned int hint = hwirq % nr_irqs; /* Look for default domain if nececssary */ if (domain == NULL) @@ -536,7 +522,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain, if (data && (data->domain == domain) && (data->hwirq == hwirq)) return i; i++; - if (i >= irq_virq_count) + if (i >= nr_irqs) i = 1; } while(i != hint); return 0; @@ -642,8 +628,9 @@ static int virq_debug_show(struct seq_file *m, void *private) void *data; int i; - seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq", - "chip name", "chip data", "domain name"); + seq_printf(m, "%-5s %-7s %-15s %-*s %s\n", "irq", "hwirq", + "chip name", (int)(2 * sizeof(void *) + 2), "chip data", + "domain name"); for (i = 1; i < nr_irqs; i++) { desc = irq_to_desc(i); @@ -666,7 +653,7 @@ static int virq_debug_show(struct seq_file *m, void *private) seq_printf(m, "%-15s ", p); data = irq_desc_get_chip_data(desc); - seq_printf(m, "0x%16p ", data); + seq_printf(m, data ? "0x%p " : " %p ", data); if (desc->irq_data.domain && desc->irq_data.domain->of_node) p = desc->irq_data.domain->of_node->full_name; diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 0c56d44b9fd5..1588e3b2871b 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -11,6 +11,7 @@ #include <linux/irq_work.h> #include <linux/percpu.h> #include <linux/hardirq.h> +#include <linux/irqflags.h> #include <asm/processor.h> /* diff --git a/kernel/itimer.c b/kernel/itimer.c index 22000c3db0dd..8d262b467573 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -284,8 +284,12 @@ SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value, if (value) { if(copy_from_user(&set_buffer, value, sizeof(set_buffer))) return -EFAULT; - } else - memset((char *) &set_buffer, 0, sizeof(set_buffer)); + } else { + memset(&set_buffer, 0, sizeof(set_buffer)); + printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer." + " Misfeature support will be removed\n", + current->comm); + } error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL); if (error || !ovalue) diff --git a/kernel/panic.c b/kernel/panic.c index 80aed44e345a..8ed89a175d79 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -97,7 +97,7 @@ void panic(const char *fmt, ...) /* * Avoid nested stack-dumping if a panic occurs during oops processing */ - if (!oops_in_progress) + if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) dump_stack(); #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4603b9d8f30a..5c692a0a555d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1913,7 +1913,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { sched_info_switch(prev, next); - perf_event_task_sched_out(prev, next); + perf_event_task_sched(prev, next); fire_sched_out_preempt_notifiers(prev, next); prepare_lock_switch(rq, next); prepare_arch_switch(next); @@ -1956,13 +1956,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) */ prev_state = prev->state; finish_arch_switch(prev); -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - local_irq_disable(); -#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ - perf_event_task_sched_in(prev, current); -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - local_irq_enable(); -#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ finish_lock_switch(rq, prev); finish_arch_post_lock_switch(); diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 2cf9cc7aa103..a20dc8a3c949 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -1,6 +1,10 @@ # # Timer subsystem related configuration options # + +# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is +# only related to the tick functionality. Oneshot clockevent devices +# are supported independ of this. config TICK_ONESHOT bool diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index e883f57a3cd3..bf57abdc7bd0 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -575,10 +575,12 @@ void tick_broadcast_switch_to_oneshot(void) unsigned long flags; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); + + tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; + if (cpumask_empty(tick_get_broadcast_mask())) goto end; - tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; bc = tick_broadcast_device.evtdev; if (bc) tick_broadcast_setup_oneshot(bc); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3526038f2836..6a3a5b9ff561 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -534,9 +534,9 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) hrtimer_get_expires(&ts->sched_timer), 0)) break; } - /* Update jiffies and reread time */ - tick_do_update_jiffies64(now); + /* Reread time and update jiffies */ now = ktime_get(); + tick_do_update_jiffies64(now); } } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0fa92f677c92..cf81f27ce6c6 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2469,57 +2469,35 @@ static int ftrace_avail_open(struct inode *inode, struct file *file) { struct ftrace_iterator *iter; - int ret; if (unlikely(ftrace_disabled)) return -ENODEV; - iter = kzalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) - return -ENOMEM; - - iter->pg = ftrace_pages_start; - iter->ops = &global_ops; - - ret = seq_open(file, &show_ftrace_seq_ops); - if (!ret) { - struct seq_file *m = file->private_data; - - m->private = iter; - } else { - kfree(iter); + iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); + if (iter) { + iter->pg = ftrace_pages_start; + iter->ops = &global_ops; } - return ret; + return iter ? 0 : -ENOMEM; } static int ftrace_enabled_open(struct inode *inode, struct file *file) { struct ftrace_iterator *iter; - int ret; if (unlikely(ftrace_disabled)) return -ENODEV; - iter = kzalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) - return -ENOMEM; - - iter->pg = ftrace_pages_start; - iter->flags = FTRACE_ITER_ENABLED; - iter->ops = &global_ops; - - ret = seq_open(file, &show_ftrace_seq_ops); - if (!ret) { - struct seq_file *m = file->private_data; - - m->private = iter; - } else { - kfree(iter); + iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); + if (iter) { + iter->pg = ftrace_pages_start; + iter->flags = FTRACE_ITER_ENABLED; + iter->ops = &global_ops; } - return ret; + return iter ? 0 : -ENOMEM; } static void ftrace_filter_reset(struct ftrace_hash *hash) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index cf8d11e91efd..2d5eb3320827 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -449,6 +449,7 @@ struct ring_buffer_per_cpu { raw_spinlock_t reader_lock; /* serialize readers */ arch_spinlock_t lock; struct lock_class_key lock_key; + unsigned int nr_pages; struct list_head *pages; struct buffer_page *head_page; /* read from head */ struct buffer_page *tail_page; /* write to tail */ @@ -466,10 +467,12 @@ struct ring_buffer_per_cpu { unsigned long read_bytes; u64 write_stamp; u64 read_stamp; + /* ring buffer pages to update, > 0 to add, < 0 to remove */ + int nr_pages_to_update; + struct list_head new_pages; /* new pages to add */ }; struct ring_buffer { - unsigned pages; unsigned flags; int cpus; atomic_t record_disabled; @@ -963,14 +966,10 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) return 0; } -static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, - unsigned nr_pages) +static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu) { + int i; struct buffer_page *bpage, *tmp; - LIST_HEAD(pages); - unsigned i; - - WARN_ON(!nr_pages); for (i = 0; i < nr_pages; i++) { struct page *page; @@ -981,15 +980,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, */ bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL | __GFP_NORETRY, - cpu_to_node(cpu_buffer->cpu)); + cpu_to_node(cpu)); if (!bpage) goto free_pages; - rb_check_bpage(cpu_buffer, bpage); + list_add(&bpage->list, pages); - list_add(&bpage->list, &pages); - - page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), + page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY, 0); if (!page) goto free_pages; @@ -997,6 +994,27 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, rb_init_page(bpage->page); } + return 0; + +free_pages: + list_for_each_entry_safe(bpage, tmp, pages, list) { + list_del_init(&bpage->list); + free_buffer_page(bpage); + } + + return -ENOMEM; +} + +static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, + unsigned nr_pages) +{ + LIST_HEAD(pages); + + WARN_ON(!nr_pages); + + if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu)) + return -ENOMEM; + /* * The ring buffer page list is a circular list that does not * start and end with a list head. All page list items point to @@ -1005,20 +1023,15 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, cpu_buffer->pages = pages.next; list_del(&pages); + cpu_buffer->nr_pages = nr_pages; + rb_check_pages(cpu_buffer); return 0; - - free_pages: - list_for_each_entry_safe(bpage, tmp, &pages, list) { - list_del_init(&bpage->list); - free_buffer_page(bpage); - } - return -ENOMEM; } static struct ring_buffer_per_cpu * -rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) +rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct buffer_page *bpage; @@ -1052,7 +1065,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) INIT_LIST_HEAD(&cpu_buffer->reader_page->list); - ret = rb_allocate_pages(cpu_buffer, buffer->pages); + ret = rb_allocate_pages(cpu_buffer, nr_pages); if (ret < 0) goto fail_free_reader; @@ -1113,7 +1126,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, { struct ring_buffer *buffer; int bsize; - int cpu; + int cpu, nr_pages; /* keep it in its own cache line */ buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), @@ -1124,14 +1137,14 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) goto fail_free_buffer; - buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); + nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); buffer->flags = flags; buffer->clock = trace_clock_local; buffer->reader_lock_key = key; /* need at least two pages */ - if (buffer->pages < 2) - buffer->pages = 2; + if (nr_pages < 2) + nr_pages = 2; /* * In case of non-hotplug cpu, if the ring-buffer is allocated @@ -1154,7 +1167,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, for_each_buffer_cpu(buffer, cpu) { buffer->buffers[cpu] = - rb_allocate_cpu_buffer(buffer, cpu); + rb_allocate_cpu_buffer(buffer, nr_pages, cpu); if (!buffer->buffers[cpu]) goto fail_free_buffers; } @@ -1276,6 +1289,18 @@ out: raw_spin_unlock_irq(&cpu_buffer->reader_lock); } +static void update_pages_handler(struct ring_buffer_per_cpu *cpu_buffer) +{ + if (cpu_buffer->nr_pages_to_update > 0) + rb_insert_pages(cpu_buffer, &cpu_buffer->new_pages, + cpu_buffer->nr_pages_to_update); + else + rb_remove_pages(cpu_buffer, -cpu_buffer->nr_pages_to_update); + cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update; + /* reset this value */ + cpu_buffer->nr_pages_to_update = 0; +} + /** * ring_buffer_resize - resize the ring buffer * @buffer: the buffer to resize. @@ -1285,14 +1310,12 @@ out: * * Returns -1 on failure. */ -int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) +int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, + int cpu_id) { struct ring_buffer_per_cpu *cpu_buffer; - unsigned nr_pages, rm_pages, new_pages; - struct buffer_page *bpage, *tmp; - unsigned long buffer_size; - LIST_HEAD(pages); - int i, cpu; + unsigned nr_pages; + int cpu; /* * Always succeed at resizing a non-existent buffer: @@ -1302,15 +1325,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); size *= BUF_PAGE_SIZE; - buffer_size = buffer->pages * BUF_PAGE_SIZE; /* we need a minimum of two pages */ if (size < BUF_PAGE_SIZE * 2) size = BUF_PAGE_SIZE * 2; - if (size == buffer_size) - return size; - atomic_inc(&buffer->record_disabled); /* Make sure all writers are done with this buffer. */ @@ -1321,68 +1340,56 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); - if (size < buffer_size) { - - /* easy case, just free pages */ - if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) - goto out_fail; - - rm_pages = buffer->pages - nr_pages; - + if (cpu_id == RING_BUFFER_ALL_CPUS) { + /* calculate the pages to update */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; - rb_remove_pages(cpu_buffer, rm_pages); - } - goto out; - } - /* - * This is a bit more difficult. We only want to add pages - * when we can allocate enough for all CPUs. We do this - * by allocating all the pages and storing them on a local - * link list. If we succeed in our allocation, then we - * add these pages to the cpu_buffers. Otherwise we just free - * them all and return -ENOMEM; - */ - if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) - goto out_fail; + cpu_buffer->nr_pages_to_update = nr_pages - + cpu_buffer->nr_pages; - new_pages = nr_pages - buffer->pages; + /* + * nothing more to do for removing pages or no update + */ + if (cpu_buffer->nr_pages_to_update <= 0) + continue; - for_each_buffer_cpu(buffer, cpu) { - for (i = 0; i < new_pages; i++) { - struct page *page; /* - * __GFP_NORETRY flag makes sure that the allocation - * fails gracefully without invoking oom-killer and - * the system is not destabilized. + * to add pages, make sure all new pages can be + * allocated without receiving ENOMEM */ - bpage = kzalloc_node(ALIGN(sizeof(*bpage), - cache_line_size()), - GFP_KERNEL | __GFP_NORETRY, - cpu_to_node(cpu)); - if (!bpage) - goto free_pages; - list_add(&bpage->list, &pages); - page = alloc_pages_node(cpu_to_node(cpu), - GFP_KERNEL | __GFP_NORETRY, 0); - if (!page) - goto free_pages; - bpage->page = page_address(page); - rb_init_page(bpage->page); + INIT_LIST_HEAD(&cpu_buffer->new_pages); + if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update, + &cpu_buffer->new_pages, cpu)) + /* not enough memory for new pages */ + goto no_mem; } - } - for_each_buffer_cpu(buffer, cpu) { - cpu_buffer = buffer->buffers[cpu]; - rb_insert_pages(cpu_buffer, &pages, new_pages); - } + /* wait for all the updates to complete */ + for_each_buffer_cpu(buffer, cpu) { + cpu_buffer = buffer->buffers[cpu]; + if (cpu_buffer->nr_pages_to_update) { + update_pages_handler(cpu_buffer); + } + } + } else { + cpu_buffer = buffer->buffers[cpu_id]; + if (nr_pages == cpu_buffer->nr_pages) + goto out; - if (RB_WARN_ON(buffer, !list_empty(&pages))) - goto out_fail; + cpu_buffer->nr_pages_to_update = nr_pages - + cpu_buffer->nr_pages; + + INIT_LIST_HEAD(&cpu_buffer->new_pages); + if (cpu_buffer->nr_pages_to_update > 0 && + __rb_allocate_pages(cpu_buffer->nr_pages_to_update, + &cpu_buffer->new_pages, cpu_id)) + goto no_mem; + + update_pages_handler(cpu_buffer); + } out: - buffer->pages = nr_pages; put_online_cpus(); mutex_unlock(&buffer->mutex); @@ -1390,25 +1397,24 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) return size; - free_pages: - list_for_each_entry_safe(bpage, tmp, &pages, list) { - list_del_init(&bpage->list); - free_buffer_page(bpage); + no_mem: + for_each_buffer_cpu(buffer, cpu) { + struct buffer_page *bpage, *tmp; + cpu_buffer = buffer->buffers[cpu]; + /* reset this number regardless */ + cpu_buffer->nr_pages_to_update = 0; + if (list_empty(&cpu_buffer->new_pages)) + continue; + list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, + list) { + list_del_init(&bpage->list); + free_buffer_page(bpage); + } } put_online_cpus(); mutex_unlock(&buffer->mutex); atomic_dec(&buffer->record_disabled); return -ENOMEM; - - /* - * Something went totally wrong, and we are too paranoid - * to even clean up the mess. - */ - out_fail: - put_online_cpus(); - mutex_unlock(&buffer->mutex); - atomic_dec(&buffer->record_disabled); - return -1; } EXPORT_SYMBOL_GPL(ring_buffer_resize); @@ -1510,7 +1516,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) * assign the commit to the tail. */ again: - max_count = cpu_buffer->buffer->pages * 100; + max_count = cpu_buffer->nr_pages * 100; while (cpu_buffer->commit_page != cpu_buffer->tail_page) { if (RB_WARN_ON(cpu_buffer, !(--max_count))) @@ -3588,9 +3594,18 @@ EXPORT_SYMBOL_GPL(ring_buffer_read); * ring_buffer_size - return the size of the ring buffer (in bytes) * @buffer: The ring buffer. */ -unsigned long ring_buffer_size(struct ring_buffer *buffer) +unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu) { - return BUF_PAGE_SIZE * buffer->pages; + /* + * Earlier, this method returned + * BUF_PAGE_SIZE * buffer->nr_pages + * Since the nr_pages field is now removed, we have converted this to + * return the per cpu buffer value. + */ + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; + + return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages; } EXPORT_SYMBOL_GPL(ring_buffer_size); @@ -3765,8 +3780,11 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, !cpumask_test_cpu(cpu, buffer_b->cpumask)) goto out; + cpu_buffer_a = buffer_a->buffers[cpu]; + cpu_buffer_b = buffer_b->buffers[cpu]; + /* At least make sure the two buffers are somewhat the same */ - if (buffer_a->pages != buffer_b->pages) + if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages) goto out; ret = -EAGAIN; @@ -3780,9 +3798,6 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, if (atomic_read(&buffer_b->record_disabled)) goto out; - cpu_buffer_a = buffer_a->buffers[cpu]; - cpu_buffer_b = buffer_b->buffers[cpu]; - if (atomic_read(&cpu_buffer_a->record_disabled)) goto out; @@ -4071,6 +4086,8 @@ static int rb_cpu_notify(struct notifier_block *self, struct ring_buffer *buffer = container_of(self, struct ring_buffer, cpu_notify); long cpu = (long)hcpu; + int cpu_i, nr_pages_same; + unsigned int nr_pages; switch (action) { case CPU_UP_PREPARE: @@ -4078,8 +4095,23 @@ static int rb_cpu_notify(struct notifier_block *self, if (cpumask_test_cpu(cpu, buffer->cpumask)) return NOTIFY_OK; + nr_pages = 0; + nr_pages_same = 1; + /* check if all cpu sizes are same */ + for_each_buffer_cpu(buffer, cpu_i) { + /* fill in the size from first enabled cpu */ + if (nr_pages == 0) + nr_pages = buffer->buffers[cpu_i]->nr_pages; + if (nr_pages != buffer->buffers[cpu_i]->nr_pages) { + nr_pages_same = 0; + break; + } + } + /* allocate minimum pages, user can later expand it */ + if (!nr_pages_same) + nr_pages = 2; buffer->buffers[cpu] = - rb_allocate_cpu_buffer(buffer, cpu); + rb_allocate_cpu_buffer(buffer, nr_pages, cpu); if (!buffer->buffers[cpu]) { WARN(1, "failed to allocate ring buffer on CPU %ld\n", cpu); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ed7b5d1e12f4..48ef4960ec90 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -87,18 +87,6 @@ static int tracing_disabled = 1; DEFINE_PER_CPU(int, ftrace_cpu_disabled); -static inline void ftrace_disable_cpu(void) -{ - preempt_disable(); - __this_cpu_inc(ftrace_cpu_disabled); -} - -static inline void ftrace_enable_cpu(void) -{ - __this_cpu_dec(ftrace_cpu_disabled); - preempt_enable(); -} - cpumask_var_t __read_mostly tracing_buffer_mask; /* @@ -629,7 +617,6 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) { int len; - void *ret; if (s->len <= s->readpos) return -EBUSY; @@ -637,9 +624,7 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) len = s->len - s->readpos; if (cnt > len) cnt = len; - ret = memcpy(buf, s->buffer + s->readpos, cnt); - if (!ret) - return -EFAULT; + memcpy(buf, s->buffer + s->readpos, cnt); s->readpos += cnt; return cnt; @@ -751,8 +736,6 @@ update_m |
