aboutsummaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-17 09:18:14 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-17 09:18:14 +0100
commit9c87e61e3c5797277407ba5eae4eac8a52be3fa3 (patch)
treee3f902cb5363b5b90ab74a4b7e26fafbc15aaeaf /include/linux
parentb85966adbf5de0668a815c6e3527f87e0c387fb4 (diff)
parente4287bf34f97a88c7d9322f5bde828724c073a6b (diff)
Merge tag 'bpf-next-7.2' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov: "Major changes: - Recover from BPF arena page faults using a scratch page and add ptep_try_set() for lockless empty-slot installs on x86 and arm64. This allows BPF kfuncs to access arena pointers directly. The 'arena_direct_access' stable branch was created for this work and was pulled into sched-ext and bpf-next trees (Tejun Heo, Kumar Kartikeya Dwivedi) - Lift old restriction and support 6+ arguments in BPF programs and kfuncs on x86 and arm64 (Yonghong Song, Puranjay Mohan) Other features and fixes: - Add 24-bit BTF vlen and reclaim unused bits in the BTF UAPI to ease addition of new BTF kinds (Alan Maguire) - Raise the maximum BPF call chain depth from 8 to 16 frames (Alexei Starovoitov) - Refactor object relationship tracking in the verifier and fix a dynptr use-after-free bug (Amery Hung) - Harden the signed program loader and reject exclusive maps as inner maps (Daniel Borkmann) - Replace the verifier min/max bounds fields with a circular number (cnum) representation and improve 32->64 bit range refinements (Eduard Zingerman) - Introduce the arena library and runtime (libarena) with a buddy allocator, rbtree and SPMC queue data structures, ASAN support and a parallel test harness. Allow subprograms to return arena pointers and switch to a BTF type-tag based __arena annotation (Emil Tsalapatis) - Cache build IDs in the sleepable stackmap path and avoid faultable build ID reads under mm locks (Ihor Solodrai) - Introduce the tracing_multi link to attach a single BPF program to many kernel functions at once. Allow specifying the uprobe_multi target via FD (Jiri Olsa) - Extend the bpf_list family of kfuncs with bpf_list_add/del(), and bpf_list_is_first/is_last/empty() (Kaitao Cheng) - Extend the BPF syscall with common attributes support for prog_load, btf_load and map_create (Leon Hwang) - Wrap rhashtable as BPF map (Mykyta Yatsenko, Herbert Xu) - Add sleepable support for tracepoint programs and fix deadlocks in LRU map due to NMI reentry (Mykyta Yatsenko) - Fix OOB access in bpf_flow_keys, fix nullness analysis of inner arrays, enforce write checks for global subprograms (Nuoqi Gui) - Report the maximum combined stack depth and print a breakdown of instructions processed per subprogram (Paul Chaignon) - Add an XDP load-balancer benchmark and arm64 JIT support for stack arguments (Puranjay Mohan) - Add kfuncs to traverse over wakeup_sources (Samuel Wu) - Allow sleepable BPF programs to use LPM trie maps directly (Vlad Poenaru) - Many more fixes and cleanups across the verifier, BTF, sockmap, devmap, bpffs, security hooks, s390/riscv/loongarch JITs, rqspinlock, libbpf, bpftool, selftests" * tag 'bpf-next-7.2' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (336 commits) selftests/bpf: Work around llvm stack overflow in crypto progs selftests/bpf: add test for bpf_msg_pop_data() overflow bpf, sockmap: fix integer overflow in bpf_msg_pop_data() bounds check sockmap: Fix use-after-free in udp_bpf_recvmsg() bpf, sockmap: keep sk_msg copy state in sync bpf, sockmap: Fix wrong rsge offset in bpf_msg_push_data() bpf, sockmap: reject overflowing copy + len in bpf_msg_push_data() selftsets/bpf: Retry map update on helper_fill_hashmap() selftests/bpf: Add test for sleepable lsm_cgroup rejection selftests/bpf: Add test to verify the fix for bpf_setsockopt() helper bpf: Fix bpf_get/setsockopt to tos for ipv4-mapped ipv6 socket selftests/bpf: Avoid static LLVM linking for cross builds selftests/bpf: Use common CFLAGS for urandom_read selftests/bpf: Initialize operation name before use tools/bpf: build: Append extra cflags libbpf: Initialize CFLAGS before including Makefile.include bpftool: Append extra host flags bpftool: Avoid adding EXTRA_CFLAGS to HOST_CFLAGS bpftool: Pass host flags to bootstrap libbpf selftests/bpf: correct CONFIG_PPC64 macro name in comment ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bpf-cgroup.h5
-rw-r--r--include/linux/bpf.h282
-rw-r--r--include/linux/bpf_defs.h19
-rw-r--r--include/linux/bpf_lsm.h6
-rw-r--r--include/linux/bpf_types.h2
-rw-r--r--include/linux/bpf_verifier.h301
-rw-r--r--include/linux/btf.h7
-rw-r--r--include/linux/btf_ids.h1
-rw-r--r--include/linux/cnum.h82
-rw-r--r--include/linux/filter.h27
-rw-r--r--include/linux/ftrace.h9
-rw-r--r--include/linux/pgtable.h43
-rw-r--r--include/linux/rhashtable.h4
-rw-r--r--include/linux/syscalls.h3
-rw-r--r--include/linux/trace_events.h12
15 files changed, 656 insertions, 147 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index b2e79c2b41d5..4d0cc65976a1 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -421,7 +421,7 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr,
enum bpf_prog_type ptype);
int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int cgroup_bpf_prog_query(const union bpf_attr *attr,
- union bpf_attr __user *uattr);
+ union bpf_attr __user *uattr, u32 uattr_size);
const struct bpf_func_proto *
cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
@@ -452,7 +452,8 @@ static inline int cgroup_bpf_link_attach(const union bpf_attr *attr,
}
static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
- union bpf_attr __user *uattr)
+ union bpf_attr __user *uattr,
+ u32 uattr_size)
{
return -EINVAL;
}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 64efc3fdb716..7719f6528445 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -6,6 +6,7 @@
#include <uapi/linux/bpf.h>
#include <uapi/linux/filter.h>
+#include <linux/bpf_defs.h>
#include <crypto/sha2.h>
#include <linux/workqueue.h>
@@ -32,6 +33,8 @@
#include <linux/memcontrol.h>
#include <linux/cfi.h>
#include <linux/xattr.h>
+#include <linux/key.h>
+#include <linux/ftrace.h>
#include <asm/rqspinlock.h>
struct bpf_verifier_env;
@@ -111,7 +114,7 @@ struct bpf_map_ops {
long (*map_pop_elem)(struct bpf_map *map, void *value);
long (*map_peek_elem)(struct bpf_map *map, void *value);
void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu);
- int (*map_get_hash)(struct bpf_map *map, u32 hash_buf_size, void *hash_buf);
+ int (*map_get_hash)(struct bpf_map *map);
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -296,6 +299,7 @@ struct bpf_map_owner {
struct bpf_map {
u8 sha[SHA256_DIGEST_SIZE];
+ u32 excl;
const struct bpf_map_ops *ops;
struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
@@ -489,6 +493,35 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f
return rec->field_mask & type;
}
+static inline bool btf_field_is_nmi_safe(enum btf_field_type type)
+{
+ switch (type) {
+ case BPF_SPIN_LOCK:
+ case BPF_RES_SPIN_LOCK:
+ case BPF_TIMER:
+ case BPF_WORKQUEUE:
+ case BPF_TASK_WORK:
+ case BPF_KPTR_UNREF:
+ case BPF_REFCOUNT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool btf_record_has_nmi_unsafe_fields(const struct btf_record *rec)
+{
+ int i;
+
+ if (IS_ERR_OR_NULL(rec))
+ return false;
+ for (i = 0; i < rec->cnt; i++) {
+ if (!btf_field_is_nmi_safe(rec->fields[i].type))
+ return true;
+ }
+ return false;
+}
+
static inline void bpf_obj_init(const struct btf_record *rec, void *obj)
{
int i;
@@ -618,6 +651,8 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
struct bpf_spin_lock *spin_lock);
u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena);
u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena);
+u64 bpf_arena_map_kern_vm_start(struct bpf_map *map);
+struct bpf_map *bpf_prog_arena(struct bpf_prog *prog);
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
struct bpf_offload_dev;
@@ -679,6 +714,8 @@ int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags,
void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt, int node_id,
u64 flags);
void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt);
+void *bpf_arena_alloc_pages_sleepable(void *p__map, void *addr__ign, u32 page_cnt, int node_id,
+ u64 flags);
#else
static inline void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt,
int node_id, u64 flags)
@@ -689,6 +726,12 @@ static inline void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr
static inline void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt)
{
}
+
+static inline void *bpf_arena_alloc_pages_sleepable(void *p__map, void *addr__ign, u32 page_cnt,
+ int node_id, u64 flags)
+{
+ return NULL;
+}
#endif
extern const struct bpf_map_ops bpf_map_offload_ops;
@@ -1052,7 +1095,7 @@ struct bpf_insn_access_aux {
struct {
struct btf *btf;
u32 btf_id;
- u32 ref_obj_id;
+ u32 ref_id;
};
};
struct bpf_verifier_log *log; /* for verbose logs */
@@ -1152,6 +1195,11 @@ struct bpf_prog_offload {
/* The longest tracepoint has 12 args.
* See include/trace/bpf_probe.h
+ *
+ * Also reuse this macro for maximum number of arguments a BPF function
+ * or a kfunc can have. Args 1-5 are passed in registers, args 6-12 via
+ * stack arg slots. The JIT may map some stack arg slots to registers based
+ * on the native calling convention (e.g., arg 6 to R9 on x86-64).
*/
#define MAX_BPF_FUNC_ARGS 12
@@ -1234,9 +1282,9 @@ enum {
#define BPF_TRAMP_COOKIE_INDEX_SHIFT 8
#define BPF_TRAMP_IS_RETURN_SHIFT 63
-struct bpf_tramp_links {
- struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS];
- int nr_links;
+struct bpf_tramp_nodes {
+ struct bpf_tramp_node *nodes[BPF_MAX_TRAMP_LINKS];
+ int nr_nodes;
};
struct bpf_tramp_run_ctx;
@@ -1264,13 +1312,13 @@ struct bpf_tramp_run_ctx;
struct bpf_tramp_image;
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr);
void *arch_alloc_bpf_trampoline(unsigned int size);
void arch_free_bpf_trampoline(void *image, unsigned int size);
int __must_check arch_protect_bpf_trampoline(void *image, unsigned int size);
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr);
+ struct bpf_tramp_nodes *tnodes, void *func_addr);
u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx);
@@ -1336,8 +1384,6 @@ struct bpf_trampoline {
/* hlist for trampoline_ip_table */
struct hlist_node hlist_ip;
struct ftrace_ops *fops;
- /* serializes access to fields of this trampoline */
- struct mutex mutex;
refcount_t refcnt;
u32 flags;
u64 key;
@@ -1358,6 +1404,11 @@ struct bpf_trampoline {
int progs_cnt[BPF_TRAMP_MAX];
/* Executable image of trampoline */
struct bpf_tramp_image *cur_image;
+ /* Used as temporary old image storage for multi_attach */
+ struct {
+ struct bpf_tramp_image *old_image;
+ u32 old_flags;
+ } multi_attach;
};
struct bpf_attach_target_info {
@@ -1455,11 +1506,13 @@ static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u6
return 0;
}
+struct bpf_tracing_multi_link;
+
#ifdef CONFIG_BPF_JIT
-int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+int bpf_trampoline_link_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog);
-int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+int bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog);
struct bpf_trampoline *bpf_trampoline_get(u64 key,
@@ -1467,6 +1520,11 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key,
void bpf_trampoline_put(struct bpf_trampoline *tr);
int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs);
+int bpf_trampoline_multi_attach(struct bpf_prog *prog, u32 *ids,
+ struct bpf_tracing_multi_link *link);
+int bpf_trampoline_multi_detach(struct bpf_prog *prog,
+ struct bpf_tracing_multi_link *link);
+
/*
* When the architecture supports STATIC_CALL replace the bpf_dispatcher_fn
* indirection with a direct call to the bpf program. If the architecture does
@@ -1544,14 +1602,15 @@ void bpf_jit_uncharge_modmem(u32 size);
bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
bool bpf_insn_is_indirect_target(const struct bpf_verifier_env *env, const struct bpf_prog *prog,
int insn_idx);
+u16 bpf_out_stack_arg_cnt(const struct bpf_verifier_env *env, const struct bpf_prog *prog);
#else
-static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+static inline int bpf_trampoline_link_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog)
{
return -ENOTSUPP;
}
-static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog)
{
@@ -1578,6 +1637,16 @@ static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
{
return false;
}
+static inline int bpf_trampoline_multi_attach(struct bpf_prog *prog, u32 *ids,
+ struct bpf_tracing_multi_link *link)
+{
+ return -ENOTSUPP;
+}
+static inline int bpf_trampoline_multi_detach(struct bpf_prog *prog,
+ struct bpf_tracing_multi_link *link)
+{
+ return -ENOTSUPP;
+}
#endif
struct bpf_func_info_aux {
@@ -1615,7 +1684,7 @@ struct bpf_ctx_arg_aux {
enum bpf_reg_type reg_type;
struct btf *btf;
u32 btf_id;
- u32 ref_obj_id;
+ u32 ref_id;
bool refcounted;
};
@@ -1657,6 +1726,19 @@ struct bpf_stream_stage {
int len;
};
+enum bpf_sig_verdict {
+ BPF_SIG_UNSIGNED = 0,
+ BPF_SIG_VERIFIED,
+};
+
+enum bpf_sig_keyring {
+ BPF_SIG_KEYRING_NONE = 0,
+ BPF_SIG_KEYRING_BUILTIN,
+ BPF_SIG_KEYRING_SECONDARY,
+ BPF_SIG_KEYRING_PLATFORM,
+ BPF_SIG_KEYRING_USER,
+};
+
struct bpf_prog_aux {
atomic64_t refcnt;
u32 used_map_cnt;
@@ -1699,6 +1781,11 @@ struct bpf_prog_aux {
bool changes_pkt_data;
bool might_sleep;
bool kprobe_write_ctx;
+ struct {
+ s32 keyring_serial;
+ u8 keyring_type;
+ u8 verdict;
+ } sig;
u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
struct bpf_arena *arena;
@@ -1731,6 +1818,7 @@ struct bpf_prog_aux {
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
char name[BPF_OBJ_NAME_LEN];
u64 (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp, u64, u64);
+ u16 stack_arg_sp_adjust;
#ifdef CONFIG_SECURITY
void *security;
#endif
@@ -1874,12 +1962,17 @@ struct bpf_link_ops {
__poll_t (*poll)(struct file *file, struct poll_table_struct *pts);
};
-struct bpf_tramp_link {
- struct bpf_link link;
+struct bpf_tramp_node {
+ struct bpf_link *link;
struct hlist_node tramp_hlist;
u64 cookie;
};
+struct bpf_tramp_link {
+ struct bpf_link link;
+ struct bpf_tramp_node node;
+};
+
struct bpf_shim_tramp_link {
struct bpf_tramp_link link;
struct bpf_trampoline *trampoline;
@@ -1887,13 +1980,31 @@ struct bpf_shim_tramp_link {
struct bpf_tracing_link {
struct bpf_tramp_link link;
+ struct bpf_tramp_node fexit;
struct bpf_trampoline *trampoline;
struct bpf_prog *tgt_prog;
};
-struct bpf_fsession_link {
- struct bpf_tracing_link link;
- struct bpf_tramp_link fexit;
+struct bpf_tracing_multi_node {
+ struct bpf_tramp_node node;
+ struct bpf_trampoline *trampoline;
+ struct ftrace_func_entry entry;
+};
+
+struct bpf_tracing_multi_data {
+ struct ftrace_hash *unreg;
+ struct ftrace_hash *modify;
+ struct ftrace_hash *reg;
+ struct ftrace_func_entry *entry;
+};
+
+struct bpf_tracing_multi_link {
+ struct bpf_link link;
+ struct bpf_tracing_multi_data data;
+ u64 *cookies;
+ struct bpf_tramp_node *fexits;
+ int nodes_cnt;
+ struct bpf_tracing_multi_node nodes[] __counted_by(nodes_cnt);
};
struct bpf_raw_tp_link {
@@ -2079,6 +2190,12 @@ static inline void bpf_prog_put_recursion_context(struct bpf_prog *prog)
#endif
}
+static inline bool is_tracing_multi(enum bpf_attach_type type)
+{
+ return type == BPF_TRACE_FENTRY_MULTI || type == BPF_TRACE_FEXIT_MULTI ||
+ type == BPF_TRACE_FSESSION_MULTI;
+}
+
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
/* This macro helps developer to register a struct_ops type and generate
* type information correctly. Developers should use this macro to register
@@ -2099,8 +2216,8 @@ void bpf_struct_ops_put(const void *kdata);
int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff);
int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
void *value);
-int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
- struct bpf_tramp_link *link,
+int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_nodes *tnodes,
+ struct bpf_tramp_node *node,
const struct btf_func_model *model,
void *stub_func,
void **image, u32 *image_off,
@@ -2125,6 +2242,9 @@ int bpf_prog_assoc_struct_ops(struct bpf_prog *prog, struct bpf_map *map);
void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog);
void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux);
u32 bpf_struct_ops_id(const void *kdata);
+int bpf_struct_ops_for_each_prog(const void *kdata,
+ int (*cb)(struct bpf_prog *prog, void *data),
+ void *data);
#ifdef CONFIG_NET
/* Define it here to avoid the use of forward declaration */
@@ -2192,31 +2312,33 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op
#endif
-static inline int bpf_fsession_cnt(struct bpf_tramp_links *links)
+static inline int bpf_fsession_cnt(struct bpf_tramp_nodes *nodes)
{
- struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes fentries = nodes[BPF_TRAMP_FENTRY];
int cnt = 0;
- for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
- if (fentries.links[i]->link.prog->expected_attach_type == BPF_TRACE_FSESSION)
+ for (int i = 0; i < nodes[BPF_TRAMP_FENTRY].nr_nodes; i++) {
+ if (fentries.nodes[i]->link->prog->expected_attach_type == BPF_TRACE_FSESSION)
+ cnt++;
+ if (fentries.nodes[i]->link->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)
cnt++;
}
return cnt;
}
-static inline bool bpf_prog_calls_session_cookie(struct bpf_tramp_link *link)
+static inline bool bpf_prog_calls_session_cookie(struct bpf_tramp_node *node)
{
- return link->link.prog->call_session_cookie;
+ return node->link->prog->call_session_cookie;
}
-static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_links *links)
+static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_nodes *nodes)
{
- struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes fentries = nodes[BPF_TRAMP_FENTRY];
int cnt = 0;
- for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
- if (bpf_prog_calls_session_cookie(fentries.links[i]))
+ for (int i = 0; i < nodes[BPF_TRAMP_FENTRY].nr_nodes; i++) {
+ if (bpf_prog_calls_session_cookie(fentries.nodes[i]))
cnt++;
}
@@ -2598,6 +2720,7 @@ bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *r
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
void bpf_obj_free_task_work(const struct btf_record *rec, void *obj);
+void bpf_obj_cancel_fields(struct bpf_map *map, void *obj);
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
@@ -2764,6 +2887,9 @@ void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type,
const struct bpf_link_ops *ops, struct bpf_prog *prog,
enum bpf_attach_type attach_type, bool sleepable);
+void bpf_tramp_link_init(struct bpf_tramp_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops, struct bpf_prog *prog,
+ enum bpf_attach_type attach_type, u64 cookie);
int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer);
int bpf_link_settle(struct bpf_link_primer *primer);
void bpf_link_cleanup(struct bpf_link_primer *primer);
@@ -2917,7 +3043,9 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size,
size_t actual_size);
/* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size);
+struct bpf_log_attr;
+int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr,
+ struct bpf_log_attr *attr_log);
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
int bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
@@ -3088,6 +3216,56 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip);
+static __always_inline u32
+bpf_prog_run_array_sleepable(const struct bpf_prog_array *array,
+ const void *ctx, bpf_prog_run_fn run_prog)
+{
+ const struct bpf_prog_array_item *item;
+ struct bpf_prog *prog;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_trace_run_ctx run_ctx;
+ u32 ret = 1;
+
+ if (unlikely(!array))
+ return ret;
+
+ migrate_disable();
+
+ run_ctx.is_uprobe = false;
+
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ item = &array->items[0];
+ while ((prog = READ_ONCE(item->prog))) {
+ /* Skip dummy_bpf_prog placeholder (len == 0) */
+ if (unlikely(!prog->len)) {
+ item++;
+ continue;
+ }
+
+ if (unlikely(!bpf_prog_get_recursion_context(prog))) {
+ bpf_prog_inc_misses_counter(prog);
+ bpf_prog_put_recursion_context(prog);
+ item++;
+ continue;
+ }
+
+ run_ctx.bpf_cookie = item->bpf_cookie;
+
+ if (!prog->sleepable) {
+ guard(rcu)();
+ ret &= run_prog(prog, ctx);
+ } else {
+ ret &= run_prog(prog, ctx);
+ }
+
+ bpf_prog_put_recursion_context(prog);
+ item++;
+ }
+ bpf_reset_run_ctx(old_run_ctx);
+ migrate_enable();
+ return ret;
+}
+
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -3135,6 +3313,12 @@ static inline void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_
{
}
+static inline void bpf_tramp_link_init(struct bpf_tramp_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops, struct bpf_prog *prog,
+ enum bpf_attach_type attach_type, u64 cookie)
+{
+}
+
static inline int bpf_link_prime(struct bpf_link *link,
struct bpf_link_primer *primer)
{
@@ -3626,15 +3810,25 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
#endif /* CONFIG_BPF_SYSCALL */
#endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */
-#if defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL)
+#ifdef CONFIG_KEYS
+struct bpf_key {
+ struct key *key;
+ bool has_ref;
+};
+#endif /* CONFIG_KEYS */
+#if defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL)
struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags);
struct bpf_key *bpf_lookup_system_key(u64 id);
void bpf_key_put(struct bpf_key *bkey);
-int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
- struct bpf_dynptr *sig_p,
+int bpf_verify_pkcs7_signature(const struct bpf_dynptr *data_p,
+ const struct bpf_dynptr *sig_p,
struct bpf_key *trusted_keyring);
+static inline s32 bpf_key_serial(const struct bpf_key *key)
+{
+ return key->has_ref ? key->key->serial : 0;
+}
#else
static inline struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
{
@@ -3650,12 +3844,17 @@ static inline void bpf_key_put(struct bpf_key *bkey)
{
}
-static inline int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
- struct bpf_dynptr *sig_p,
+static inline int bpf_verify_pkcs7_signature(const struct bpf_dynptr *data_p,
+ const struct bpf_dynptr *sig_p,
struct bpf_key *trusted_keyring)
{
return -EOPNOTSUPP;
}
+
+static inline s32 bpf_key_serial(const struct bpf_key *key)
+{
+ return 0;
+}
#endif /* defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL) */
/* verifier prototypes for helper functions called from eBPF programs */
@@ -3931,15 +4130,6 @@ static inline void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) {}
static inline void bpf_cgroup_atype_put(int cgroup_atype) {}
#endif /* CONFIG_BPF_LSM */
-struct key;
-
-#ifdef CONFIG_KEYS
-struct bpf_key {
- struct key *key;
- bool has_ref;
-};
-#endif /* CONFIG_KEYS */
-
static inline bool type_is_alloc(u32 type)
{
return type & MEM_ALLOC;
diff --git a/include/linux/bpf_defs.h b/include/linux/bpf_defs.h
new file mode 100644
index 000000000000..2185cd3966d4
--- /dev/null
+++ b/include/linux/bpf_defs.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Subset of bpf.h declarations, split out so files that need only these
+ * declarations can avoid bpf.h's full include cost.
+ */
+#ifndef _LINUX_BPF_DEFS_H
+#define _LINUX_BPF_DEFS_H
+
+#ifdef CONFIG_BPF_SYSCALL
+bool bpf_arena_handle_page_fault(unsigned long addr, bool is_write, unsigned long fault_ip);
+#else
+static inline bool bpf_arena_handle_page_fault(unsigned long addr, bool is_write,
+ unsigned long fault_ip)
+{
+ return false;
+}
+#endif
+
+#endif /* _LINUX_BPF_DEFS_H */
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 643809cc78c3..143775a27a2a 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -52,6 +52,7 @@ int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str,
const struct bpf_dynptr *value_p, int flags);
int bpf_remove_dentry_xattr_locked(struct dentry *dentry, const char *name__str);
bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog);
+bool bpf_lsm_hook_returns_errno(u32 btf_id);
#else /* !CONFIG_BPF_LSM */
@@ -104,6 +105,11 @@ static inline bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog)
{
return false;
}
+
+static inline bool bpf_lsm_hook_returns_errno(u32 btf_id)
+{
+ return true;
+}
#endif /* CONFIG_BPF_LSM */
#endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index b13de31e163f..e5906829aa6f 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -134,6 +134,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_INSN_ARRAY, insn_array_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_RHASH, rhtab_map_ops)
BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
@@ -155,3 +156,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf)
BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi)
BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops)
BPF_LINK_TYPE(BPF_LINK_TYPE_UPROBE_MULTI, uprobe_multi)
+BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING_MULTI, tracing_multi)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 185b2aa43a42..39a851e690ec 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -8,6 +8,7 @@
#include <linux/btf.h> /* for struct btf and btf_id() */
#include <linux/filter.h> /* for MAX_BPF_STACK */
#include <linux/tnum.h>
+#include <linux/cnum.h>
/* Maximum variable offset umax_value permitted when resolving memory accesses.
* In practice this is far bigger than any realistic pointer offset; this limit
@@ -65,7 +66,6 @@ struct bpf_reg_state {
struct { /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */
u32 mem_size;
- u32 dynptr_id; /* for dynptr slices */
};
/* For dynptr stack slots */
@@ -120,14 +120,8 @@ struct bpf_reg_state {
* These refer to the same value as var_off, not necessarily the actual
* contents of the register.
*/
- s64 smin_value; /* minimum possible (s64)value */
- s64 smax_value; /* maximum possible (s64)value */
- u64 umin_value; /* minimum possible (u64)value */
- u64 umax_value; /* maximum possible (u64)value */
- s32 s32_min_value; /* minimum possible (s32)value */
- s32 s32_max_value; /* maximum possible (s32)value */
- u32 u32_min_value; /* minimum possible (u32)value */
- u32 u32_max_value; /* maximum possible (u32)value */
+ struct cnum64 r64; /* 64-bit range as circular number */
+ struct cnum32 r32; /* 32-bit range as circular number */
/* For PTR_TO_PACKET, used to find other pointers with the same variable
* offset, so they can share range knowledge.
* For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
@@ -153,46 +147,14 @@ struct bpf_reg_state {
#define BPF_ADD_CONST32 (1U << 30)
#define BPF_ADD_CONST (BPF_ADD_CONST64 | BPF_ADD_CONST32)
u32 id;
- /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
- * from a pointer-cast helper, bpf_sk_fullsock() and
- * bpf_tcp_sock().
- *
- * Consider the following where "sk" is a reference counted
- * pointer returned from "sk = bpf_sk_lookup_tcp();":
- *
- * 1: sk = bpf_sk_lookup_tcp();
- * 2: if (!sk) { return 0; }
- * 3: fullsock = bpf_sk_fullsock(sk);
- * 4: if (!fullsock) { bpf_sk_release(sk); return 0; }
- * 5: tp = bpf_tcp_sock(fullsock);
- * 6: if (!tp) { bpf_sk_release(sk); return 0; }
- * 7: bpf_sk_release(sk);
- * 8: snd_cwnd = tp->snd_cwnd; // verifier will complain
- *
- * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and
- * "tp" ptr should be invalidated also. In order to do that,
- * the reg holding "fullsock" and "sk" need to remember
- * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id
- * such that the verifier can reset all regs which have
- * ref_obj_id matching the sk_reg->id.
- *
- * sk_reg->ref_obj_id is set to sk_reg->id at line 1.
- * sk_reg->id will stay as NULL-marking purpose only.
- * After NULL-marking is done, sk_reg->id can be reset to 0.
- *
- * After "fullsock = bpf_sk_fullsock(sk);" at line 3,
- * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id.
- *
- * After "tp = bpf_tcp_sock(fullsock);" at line 5,
- * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id
- * which is the same as sk_reg->ref_obj_id.
- *
- * From the verifier perspective, if sk, fullsock and tp
- * are not NULL, they are the same ptr with different
- * reg->type. In particular, bpf_sk_release(tp) is also
- * allowed and has the same effect as bpf_sk_release(sk).
+ /*
+ * Tracks the parent object this register was derived from.
+ * Used for cascading invalidation: when the parent object is
+ * released or invalidated, all registers with matching parent_id
+ * are also invalidated. For example, a slice from bpf_dynptr_data()
+ * gets parent_id set to the dynptr's id.
*/
- u32 ref_obj_id;
+ u32 parent_id;
/* Inside the callee two registers can be both PTR_TO_STACK like
* R1=fp-8 and R2=fp-8, but one of them points to this function stack
* while another to the caller's stack. To differentiate them 'frameno'
@@ -209,6 +171,66 @@ struct bpf_reg_state {
bool precise;
};
+static inline s64 reg_smin(const struct bpf_reg_state *reg)
+{
+ return cnum64_smin(reg->r64);
+}
+
+static inline s64 reg_smax(const struct bpf_reg_state *reg)
+{
+ return cnum64_smax(reg->r64);
+}
+
+static inline u64 reg_umin(const struct bpf_reg_state *reg)
+{
+ return cnum64_umin(reg->r64);
+}
+
+static inline u64 reg_umax(const struct bpf_reg_state *reg)
+{
+ return cnum64_umax(reg->r64);
+}
+
+static inline s32 reg_s32_min(const struct bpf_reg_state *reg)
+{
+ return cnum32_smin(reg->r32);
+}
+
+static inline s32 reg_s32_max(const struct bpf_reg_state *reg)
+{
+ return cnum32_smax(reg->r32);
+}
+
+static inline u32 reg_u32_min(const struct bpf_reg_state *reg)
+{
+ return cnum32_umin(reg->r32);
+}
+
+static inline u32 reg_u32_max(const struct bpf_reg_state *reg)
+{
+ return cnum32_umax(reg->r32);
+}
+
+static inline void reg_set_srange32(struct bpf_reg_state *reg, s32 smin, s32 smax)
+{
+ reg->r32 = cnum32_from_srange(smin, smax);
+}
+
+static inline void reg_set_urange32(struct bpf_reg_state *reg, u32 umin, u32 umax)
+{
+ reg->r32 = cnum32_from_urange(umin, umax);
+}
+
+static inline void reg_set_srange64(struct bpf_reg_state *reg, s64 smin, s64 smax)
+{
+ reg->r64 = cnum64_from_srange(smin, smax);
+}
+
+static inline void reg_set_urange64(struct bpf_reg_state *reg, u64 umin, u64 umax)
+{
+ reg->r