// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016 Facebook
*/
#include <linux/bpf.h>
#include <linux/jhash.h>
#include <linux/filter.h>
#include <linux/kernel.h>
#include <linux/stacktrace.h>
#include <linux/perf_event.h>
#include <linux/elf.h>
#include <linux/pagemap.h>
#include <linux/irq_work.h>
#include <linux/btf_ids.h>
#include "percpu_freelist.h"
#define STACK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \
BPF_F_STACK_BUILD_ID)
struct stack_map_bucket {
struct pcpu_freelist_node fnode;
u32 hash;
u32 nr;
u64 data[];
};
struct bpf_stack_map {
struct bpf_map map;
void *elems;
struct pcpu_freelist freelist;
u32 n_buckets;
struct stack_map_bucket *buckets[];
};
/* irq_work to run up_read() for build_id lookup in nmi context */
struct stack_map_irq_work {
struct irq_work irq_work;
struct mm_struct *mm;
};
static void do_up_read(struct irq_work *entry)
{
struct stack_map_irq_work *work;
if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
return;
work = container_of(entry, struct stack_map_irq_work, irq_work);
mmap_read_unlock_non_owner(work->mm);
}
static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
static inline bool stack_map_use_build_id(struct bpf_map *map)
{
return (map->map_flags & BPF_F_STACK_BUILD_ID);
}
static inline int stack_map_data_size(struct bpf_map *map)
{
return stack_map_use_build_id(map) ?
sizeof(struct bpf_stack_build_id) : sizeof(u64);
}
static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
{
u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
int err;
smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries,
smap->map.numa_node);
if (!smap->elems)
return -ENOMEM;
err = pcpu_freelist_init(&smap->freelist);
if (err)
goto free_elems;
pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size,
smap->map.max_entries);
return 0;
free_elems:
bpf_map_area_free(smap->elems);
return err;
}
/* Called from syscall */
static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
{
u32 value_size = attr->value_size;
struct bpf_stack_map *smap;
struct bpf_map_memory mem;
u64 cost, n_buckets;
int err;
if (!bpf_capable())
return ERR_PTR(-EPERM);
if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
value_size < 8 || value_size % 8)
return ERR_PTR(-EINVAL);
BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64));
if (attr->map_flags & BPF_F_STACK_BUILD_ID) {
if (value_size % sizeof(struct bpf_stack_build_id) ||
value_size / sizeof(struct bpf_stack_build_id)
> sysctl_perf_event_max_stack)
return ERR_PTR(-EINVAL);
} else if (value_size / 8 > sysctl_perf_event_max_stack)
return ERR_PTR(-EINVAL);
/* hash table size must be power of 2 */
n_buckets = roundup_pow_of_two(attr->max_entries);
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
err = bpf_map_charge_init(&mem, cost);
if (err)
return ERR_PTR(err);
smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
if (!smap) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
}
bpf_map_init_from_attr(&smap->map, attr);
smap->map.value_size = value_size;
smap->n_buckets = n_buckets;
err = get_callchain_buffers(sysctl_perf_event_max_stack);
if (err)
goto free_charge;
err = prealloc_elems_and_freelist(smap);
if (err)
goto put_buffers;
bpf_map_charge_move(&smap->map.memory, &mem);
return &smap->map;
put_buffers:
put_callchain_buffers();
free_charge:
bpf_map_charge_finish(&mem);
bpf_map_area_free(smap);
return ERR_PTR(err);
}
#define BPF_BUILD_ID 3
/*
* Parse build id from the note segment. This logic can be shared between
* 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
* identical.
*/
static inline int stack_map_parse_build_id(void *page_addr,
unsigned char *build_id,
void *note_start,
Elf32_Word note_size)
{
Elf32_Word note_offs = 0, new_offs;
/* check for overflow */
if (note_start < page_addr || note_start + note_size < note_start)
return -