// SPDX-License-Identifier: GPL-2.0
/*
* Performance events ring-buffer code:
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*/
#include <linux/perf_event.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/circ_buf.h>
#include <linux/poll.h>
#include <linux/nospec.h>
#include "internal.h"
static void perf_output_wakeup(struct perf_output_handle *handle)
{
atomic_set(&handle->rb->poll, EPOLLIN | EPOLLRDNORM);
handle->event->pending_wakeup = 1;
if (*perf_event_fasync(handle->event) && !handle->event->pending_kill)
handle->event->pending_kill = POLL_IN;
irq_work_queue(&handle->event->pending_irq);
}
/*
* We need to ensure a later event_id doesn't publish a head when a former
* event isn't done writing. However since we need to deal with NMIs we
* cannot fully serialize things.
*
* We only publish the head (and generate a wakeup) when the outer-most
* event completes.
*/
static void perf_output_get_handle(struct perf_output_handle *handle)
{
struct perf_buffer *rb = handle->rb;
preempt_disable();
/*
* Avoid an explicit LOAD/STORE such that architectures with memops
* can use them.
*/
(*(volatile unsigned int *)&rb->nest)++;
handle->wakeup = local_read(&rb->wakeup);
}
static void perf_output_put_handle(struct perf_output_handle *handle)
{
struct perf_buffer *rb = handle->rb;
unsigned long head;
unsigned int nest;
/*
* If this isn't the outermost nesting, we don't have to update
* @rb->user_page->data_head.
*/
nest = READ_ONCE(rb->nest);
if (nest > 1) {
WRITE_ONCE(rb->nest, nest - 1);
goto out;
}
again:
/*
* In order to avoid publishing a head value that goes backwards,
* we must ensure the load of @rb->head happens after we've
* incremented @rb->nest.
*
* Otherwise we can observe a @rb->head value before one published
* by an IRQ/NMI happening between the load and the increment.
*/
barrier();
head = local_read(&rb->head);
/*
* IRQ/NMI can happen here and advance @rb->head, causing our
* load above to be stale.
*/
/*
* Since the mmap() consumer (userspace) can run on a different CPU:
*
* kernel user
*
* if (LOAD ->data_tail) { LOAD ->data_head
* (A) smp_rmb() (C)
* STORE $data LOAD $data
* smp_wmb() (B) smp_mb() (D)
* STORE ->data_head STORE ->data_tail
* }
*
* Where A pairs with D, and B pairs with C.
*
* In our case (A) is a control dependency that separates the load of
* the ->data_tail and the stores of $data. In case ->data_tail
* indicates there is no room in the buffer to store $data we do not.
*
* D needs to be a full barrier since it separates the data READ
* from the tail WRITE.
*
* For B a WMB is sufficient since it separates two WRITEs, and for C
* an RMB is sufficient since it separates two READs.
*
* See perf_output_begin().
*/
smp_wmb(); /* B, matches C */
WRITE_ONCE(rb->user_page->data_head, head);
/*
* We must publish the head before decrementing the nest count,
* otherwise an IRQ/NMI can publish a more recent head value and our
* write will (temporarily) publish a stale value.
*/
barrier();
WRITE_ONCE(rb->nest, 0);
/*
* Ensure we decrement @rb->nest before we validate the @rb->head.
* Otherwise we cannot be sure we caught the 'last' nested update.
*/
barrier();
if (unlikely(head != local_read(&rb->head))) {
WRITE_ONCE(rb->nest, 1);
goto again;
}
if (handle->wakeup != local_read(&rb->wakeup))
perf_output_wakeup(handle);
out:
preempt_enable();
}
static __always_inline bool
ring_buffer_has_space(unsigned long head, unsigned long tail,
unsigned long data_size, unsigned int size,
bool backward)
{
if (!backward)
return CIRC_SPACE(head, tail, data_size) >= size;
else
return CIRC_SPACE(tail, head, data_size) >= size;
}
static __always_inline int
__perf_output_begin(struct perf_output_handle *handle,
struct perf_sample_data *data,
struct perf_event *event, unsigned int size,
bool backward)
{
struct perf_buffer *rb;
unsigned long tail, offset, head;
int have_lost, page_shift;
struct {
struct perf_event_header header;
u64 id;
u64 lost;
} lost_event;
rcu_read_lock();
/*
* For inherited events we send all the output towards the parent.
*/
if (event->parent)
event = event->parent;
rb = rcu_dereference(event->rb);
if (unlikely(!rb))
goto out;
if (unlikely(rb->paused)) {
if (rb->nr_pages) {
local_inc(&rb->lost);
atomic64_inc(&event->lost_samples);
}
goto out;
}
handle->rb = rb;
handle->event = event;
handle->flags = 0;