aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2026-04-12 08:29:30 -0700
committerAlexei Starovoitov <ast@kernel.org>2026-04-12 12:35:54 -0700
commit449f08fa59dda5da40317b6976604b877c4ecd63 (patch)
tree49d04653e8df7a7832b1f0a155f065d8765ba7fc /kernel
parent2ec74a053611edf77259ecdef1d94be181dd1dd6 (diff)
bpf: Move fixup/post-processing logic from verifier.c into fixups.c
verifier.c is huge. Split fixup/post-processing logic that runs after the verifier accepted the program into fixups.c. Mechanical move. No functional changes. Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/r/20260412152936.54262-2-alexei.starovoitov@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/fixups.c2457
-rw-r--r--kernel/bpf/verifier.c2955
3 files changed, 2688 insertions, 2725 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index b8ae7b0988a4..7c1eeee87fda 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_insn_array.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
+obj-$(CONFIG_BPF_SYSCALL) += fixups.o
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
diff --git a/kernel/bpf/fixups.c b/kernel/bpf/fixups.c
new file mode 100644
index 000000000000..67c9b28767e1
--- /dev/null
+++ b/kernel/bpf/fixups.c
@@ -0,0 +1,2457 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/filter.h>
+#include <linux/vmalloc.h>
+#include <linux/bsearch.h>
+#include <linux/sort.h>
+#include <linux/perf_event.h>
+#include <net/xdp.h>
+#include "disasm.h"
+
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
+static bool is_cmpxchg_insn(const struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_STX &&
+ BPF_MODE(insn->code) == BPF_ATOMIC &&
+ insn->imm == BPF_CMPXCHG;
+}
+
+/* Return the regno defined by the insn, or -1. */
+static int insn_def_regno(const struct bpf_insn *insn)
+{
+ switch (BPF_CLASS(insn->code)) {
+ case BPF_JMP:
+ case BPF_JMP32:
+ case BPF_ST:
+ return -1;
+ case BPF_STX:
+ if (BPF_MODE(insn->code) == BPF_ATOMIC ||
+ BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
+ if (insn->imm == BPF_CMPXCHG)
+ return BPF_REG_0;
+ else if (insn->imm == BPF_LOAD_ACQ)
+ return insn->dst_reg;
+ else if (insn->imm & BPF_FETCH)
+ return insn->src_reg;
+ }
+ return -1;
+ default:
+ return insn->dst_reg;
+ }
+}
+
+/* Return TRUE if INSN has defined any 32-bit value explicitly. */
+static bool insn_has_def32(struct bpf_insn *insn)
+{
+ int dst_reg = insn_def_regno(insn);
+
+ if (dst_reg == -1)
+ return false;
+
+ return !bpf_is_reg64(insn, dst_reg, NULL, DST_OP);
+}
+
+static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
+{
+ const struct bpf_kfunc_desc *d0 = a;
+ const struct bpf_kfunc_desc *d1 = b;
+
+ if (d0->imm != d1->imm)
+ return d0->imm < d1->imm ? -1 : 1;
+ if (d0->offset != d1->offset)
+ return d0->offset < d1->offset ? -1 : 1;
+ return 0;
+}
+
+const struct btf_func_model *
+bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+ const struct bpf_insn *insn)
+{
+ const struct bpf_kfunc_desc desc = {
+ .imm = insn->imm,
+ .offset = insn->off,
+ };
+ const struct bpf_kfunc_desc *res;
+ struct bpf_kfunc_desc_tab *tab;
+
+ tab = prog->aux->kfunc_tab;
+ res = bsearch(&desc, tab->descs, tab->nr_descs,
+ sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
+
+ return res ? &res->func_model : NULL;
+}
+
+static int set_kfunc_desc_imm(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc)
+{
+ unsigned long call_imm;
+
+ if (bpf_jit_supports_far_kfunc_call()) {
+ call_imm = desc->func_id;
+ } else {
+ call_imm = BPF_CALL_IMM(desc->addr);
+ /* Check whether the relative offset overflows desc->imm */
+ if ((unsigned long)(s32)call_imm != call_imm) {
+ verbose(env, "address of kernel func_id %u is out of range\n",
+ desc->func_id);
+ return -EINVAL;
+ }
+ }
+ desc->imm = call_imm;
+ return 0;
+}
+
+static int sort_kfunc_descs_by_imm_off(struct bpf_verifier_env *env)
+{
+ struct bpf_kfunc_desc_tab *tab;
+ int i, err;
+
+ tab = env->prog->aux->kfunc_tab;
+ if (!tab)
+ return 0;
+
+ for (i = 0; i < tab->nr_descs; i++) {
+ err = set_kfunc_desc_imm(env, &tab->descs[i]);
+ if (err)
+ return err;
+ }
+
+ sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
+ kfunc_desc_cmp_by_imm_off, NULL);
+ return 0;
+}
+
+static int add_kfunc_in_insns(struct bpf_verifier_env *env,
+ struct bpf_insn *insn, int cnt)
+{
+ int i, ret;
+
+ for (i = 0; i < cnt; i++, insn++) {
+ if (bpf_pseudo_kfunc_call(insn)) {
+ ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+static int get_callee_stack_depth(struct bpf_verifier_env *env,
+ const struct bpf_insn *insn, int idx)
+{
+ int start = idx + insn->imm + 1, subprog;
+
+ subprog = bpf_find_subprog(env, start);
+ if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start))
+ return -EFAULT;
+ return env->subprog_info[subprog].stack_depth;
+}
+#endif
+
+/* single env->prog->insni[off] instruction was replaced with the range
+ * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
+ * [0, off) and [off, end) to new locations, so the patched range stays zero
+ */
+static void adjust_insn_aux_data(struct bpf_verifier_env *env,
+ struct bpf_prog *new_prog, u32 off, u32 cnt)
+{
+ struct bpf_insn_aux_data *data = env->insn_aux_data;
+ struct bpf_insn *insn = new_prog->insnsi;
+ u32 old_seen = data[off].seen;
+ u32 prog_len;
+ int i;
+
+ /* aux info at OFF always needs adjustment, no matter fast path
+ * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
+ * original insn at old prog.
+ */
+ data[off].zext_dst = insn_has_def32(insn + off + cnt - 1);
+
+ if (cnt == 1)
+ return;
+ prog_len = new_prog->len;
+
+ memmove(data + off + cnt - 1, data + off,
+ sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+ memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1));
+ for (i = off; i < off + cnt - 1; i++) {
+ /* Expand insni[off]'s seen count to the patched range. */
+ data[i].seen = old_seen;
+ data[i].zext_dst = insn_has_def32(insn + i);
+ }
+}
+
+static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
+{
+ int i;
+
+ if (len == 1)
+ return;
+ /* NOTE: fake 'exit' subprog should be updated as well. */
+ for (i = 0; i <= env->subprog_cnt; i++) {
+ if (env->subprog_info[i].start <= off)
+ continue;
+ env->subprog_info[i].start += len - 1;
+ }
+}
+
+static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len)
+{
+ int i;
+
+ if (len == 1)
+ return;
+
+ for (i = 0; i < env->insn_array_map_cnt; i++)
+ bpf_insn_array_adjust(env->insn_array_maps[i], off, len);
+}
+
+static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len)
+{
+ int i;
+
+ for (i = 0; i < env->insn_array_map_cnt; i++)
+ bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len);
+}
+
+static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
+{
+ struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
+ int i, sz = prog->aux->size_poke_tab;
+ struct bpf_jit_poke_descriptor *desc;
+
+ for (i = 0; i < sz; i++) {
+ desc = &tab[i];
+ if (desc->insn_idx <= off)
+ continue;
+ desc->insn_idx += len - 1;
+ }
+}
+
+static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
+ const struct bpf_insn *patch, u32 len)
+{
+ struct bpf_prog *new_prog;
+ struct bpf_insn_aux_data *new_data = NULL;
+
+ if (len > 1) {
+ new_data = vrealloc(env->insn_aux_data,
+ array_size(env->prog->len + len - 1,
+ sizeof(struct bpf_insn_aux_data)),
+ GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!new_data)
+ return NULL;
+
+ env->insn_aux_data = new_data;
+ }
+
+ new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
+ if (IS_ERR(new_prog)) {
+ if (PTR_ERR(new_prog) == -ERANGE)
+ verbose(env,
+ "insn %d cannot be patched due to 16-bit range\n",
+ env->insn_aux_data[off].orig_idx);
+ return NULL;
+ }
+ adjust_insn_aux_data(env, new_prog, off, len);
+ adjust_subprog_starts(env, off, len);
+ adjust_insn_arrays(env, off, len);
+ adjust_poke_descs(new_prog, off, len);
+ return new_prog;
+}
+
+/*
+ * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
+ * jump offset by 'delta'.
+ */
+static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
+{
+ struct bpf_insn *insn = prog->insnsi;
+ u32 insn_cnt = prog->len, i;
+ s32 imm;
+ s16 off;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ u8 code = insn->code;
+
+ if (tgt_idx <= i && i < tgt_idx + delta)
+ continue;
+
+ if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
+ BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
+ continue;
+
+ if (insn->code == (BPF_JMP32 | BPF_JA)) {
+ if (i + 1 + insn->imm != tgt_idx)
+ continue;
+ if (check_add_overflow(insn->imm, delta, &imm))
+ return -ERANGE;
+ insn->imm = imm;
+ } else {
+ if (i + 1 + insn->off != tgt_idx)
+ continue;
+ if (check_add_overflow(insn->off, delta, &off))
+ return -ERANGE;
+ insn->off = off;
+ }
+ }
+ return 0;
+}
+
+static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
+ u32 off, u32 cnt)
+{
+ int i, j;
+
+ /* find first prog starting at or after off (first to remove) */
+ for (i = 0; i < env->subprog_cnt; i++)
+ if (env->subprog_info[i].start >= off)
+ break;
+ /* find first prog starting at or after off + cnt (first to stay) */
+ for (j = i; j < env->subprog_cnt; j++)
+ if (env->subprog_info[j].start >= off + cnt)
+ break;
+ /* if j doesn't start exactly at off + cnt, we are just removing
+ * the front of previous prog
+ */
+ if (env->subprog_info[j].start != off + cnt)
+ j--;
+
+ if (j > i) {
+ struct bpf_prog_aux *aux = env->prog->aux;
+ int move;
+
+ /* move fake 'exit' subprog as well */
+ move = env->subprog_cnt + 1 - j;
+
+ memmove(env->subprog_info + i,
+ env->subprog_info + j,
+ sizeof(*env->subprog_info) * move);
+ env->subprog_cnt -= j - i;
+
+ /* remove func_info */
+ if (aux->func_info) {
+ move = aux->func_info_cnt - j;
+
+ memmove(aux->func_info + i,
+ aux->func_info + j,
+ sizeof(*aux->func_info) * move);
+ aux->func_info_cnt -= j - i;
+ /* func_info->insn_off is set after all code rewrites,
+ * in adjust_btf_func() - no need to adjust
+ */
+ }
+ } else {
+ /* convert i from "first prog to remove" to "first to adjust" */
+ if (env->subprog_info[i].start == off)
+ i++;
+ }
+
+ /* update fake 'exit' subprog as well */
+ for (; i <= env->subprog_cnt; i++)
+ env->subprog_info[i].start -= cnt;
+
+ return 0;
+}
+
+static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
+ u32 cnt)
+{
+ struct bpf_prog *prog = env->prog;
+ u32 i, l_off, l_cnt, nr_linfo;
+ struct bpf_line_info *linfo;
+
+ nr_linfo = prog->aux->nr_linfo;
+ if (!nr_linfo)
+ return 0;
+
+ linfo = prog->aux->linfo;
+
+ /* find first line info to remove, count lines to be removed */
+ for (i = 0; i < nr_linfo; i++)
+ if (linfo[i].insn_off >= off)
+ break;
+
+ l_off = i;
+ l_cnt = 0;
+ for (; i < nr_linfo; i++)
+ if (linfo[i].insn_off < off + cnt)
+ l_cnt++;
+ else
+ break;
+
+ /* First live insn doesn't match first live linfo, it needs to "inherit"
+ * last removed linfo. prog is already modified, so prog->len == off
+ * means no live instructions after (tail of the program was removed).
+ */
+ if (prog->len != off && l_cnt &&
+ (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
+ l_cnt--;
+ linfo[--i].insn_off = off + cnt;
+ }
+
+ /* remove the line info which refer to the removed instructions */
+ if (l_cnt) {
+ memmove(linfo + l_off, linfo + i,
+ sizeof(*linfo) * (nr_linfo - i));
+
+ prog->aux->nr_linfo -= l_cnt;
+ nr_linfo = prog->aux->nr_linfo;
+ }
+
+ /* pull all linfo[i].insn_off >= off + cnt in by cnt */
+ for (i = l_off; i < nr_linfo; i++)
+ linfo[i].insn_off -= cnt;
+
+ /* fix up all subprogs (incl. 'exit') which start >= off */
+ for (i = 0; i <= env->subprog_cnt; i++)
+ if (env->subprog_info[i].linfo_idx > l_off) {
+ /* program may have started in the removed region but
+ * may not be fully removed
+ */
+ if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
+ env->subprog_info[i].linfo_idx -= l_cnt;
+ else
+ env->subprog_info[i].linfo_idx = l_off;
+ }
+
+ return 0;
+}
+
+/*
+ * Clean up dynamically allocated fields of aux data for instructions [start, ...]
+ */
+void bpf_clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len)
+{
+ struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+ struct bpf_insn *insns = env->prog->insnsi;
+ int end = start + len;
+ int i;
+
+ for (i = start; i < end; i++) {
+ if (aux_data[i].jt) {
+ kvfree(aux_data[i].jt);
+ aux_data[i].jt = NULL;
+ }
+
+ if (bpf_is_ldimm64(&insns[i]))
+ i++;
+ }
+}
+
+static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
+{
+ struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+ unsigned int orig_prog_len = env->prog->len;
+ int err;
+
+ if (bpf_prog_is_offloaded(env->prog->aux))
+ bpf_prog_offload_remove_insns(env, off, cnt);
+
+ /* Should be called before bpf_remove_insns, as it uses prog->insnsi */
+ bpf_clear_insn_aux_data(env, off, cnt);
+
+ err = bpf_remove_insns(env->prog, off, cnt);
+ if (err)
+ return err;
+
+ err = adjust_subprog_starts_after_remove(env, off, cnt);
+ if (err)
+ return err;
+
+ err = bpf_adj_linfo_after_remove(env, off, cnt);
+ if (err)
+ return err;
+
+ adjust_insn_arrays_after_remove(env, off, cnt);
+
+ memmove(aux_data + off, aux_data + off + cnt,
+ sizeof(*aux_data) * (orig_prog_len - off - cnt));
+
+ return 0;
+}
+
+static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
+static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0);
+
+bool bpf_insn_is_cond_jump(u8 code)
+{
+ u8 op;
+
+ op = BPF_OP(code);
+ if (BPF_CLASS(code) == BPF_JMP32)
+ return op != BPF_JA;
+
+ if (BPF_CLASS(code) != BPF_JMP)
+ return false;
+
+ return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
+}
+
+void bpf_opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
+{
+ struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+ struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
+ struct bpf_insn *insn = env->prog->insnsi;
+ const int insn_cnt = env->prog->len;
+ int i;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (!bpf_insn_is_cond_jump(insn->code))
+ continue;
+
+ if (!aux_data[i + 1].seen)
+ ja.off = insn->off;
+ else if (!aux_data[i + 1 + insn->off].seen)
+ ja.off = 0;
+ else
+ continue;
+
+ if (bpf_prog_is_offloaded(env->prog->aux))
+ bpf_prog_offload_replace_insn(env, i, &ja);
+
+ memcpy(insn, &ja, sizeof(ja));
+ }
+}
+
+int bpf_opt_remove_dead_code(struct bpf_verifier_env *env)
+{
+ struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+ int insn_cnt = env->prog->len;
+ int i, err;
+
+ for (i = 0; i < insn_cnt; i++) {
+ int j;
+
+ j = 0;
+ while (i + j < insn_cnt && !aux_data[i + j].seen)
+ j++;
+ if (!j)
+ continue;
+
+ err = verifier_remove_insns(env, i, j);
+ if (err)
+ return err;
+ insn_cnt = env->prog->len;
+ }
+
+ return 0;
+}
+
+int bpf_opt_remove_nops(struct bpf_verifier_env *env)
+{
+ struct bpf_insn *insn = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ bool is_may_goto_0, is_ja;
+ int i, err;
+
+ for (i = 0; i < insn_cnt; i++) {
+ is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0));
+ is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP));
+
+ if (!is_may_goto_0 && !is_ja)
+ continue;
+
+ err = verifier_remove_insns(env, i, 1);
+ if (err)
+ return err;
+ insn_cnt--;
+ /* Go back one insn to catch may_goto +1; may_goto +0 sequence */
+ i -= (is_may_goto_0 && i > 0) ? 2 : 1;
+ }
+
+ return 0;
+}
+
+int bpf_opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
+ const union bpf_attr *attr)
+{
+ struct bpf_insn *patch;
+ /* use env->insn_buf as two independent buffers */
+ struct bpf_insn *zext_patch = env->insn_buf;
+ struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2];
+ struct bpf_insn_aux_data *aux = env->insn_aux_data;
+ int i, patch_len, delta = 0, len = env->prog->len;
+ struct bpf_insn *insns = env->prog->insnsi;
+ struct bpf_prog *new_prog;
+ bool rnd_hi32;
+
+ rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
+ zext_patch[1] = BPF_ZEXT_REG(0);
+ rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
+ rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
+ rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
+ for (i = 0; i < len; i++) {
+ int adj_idx = i + delta;
+ struct bpf_insn insn;
+ int load_reg;
+
+ insn = insns[adj_idx];
+ load_reg = insn_def_regno(&insn);
+ if (!aux[adj_idx].zext_dst) {
+ u8 code, class;
+ u32 imm_rnd;
+
+ if (!rnd_hi32)
+ continue;
+
+ code = insn.code;
+ class = BPF_CLASS(code);
+ if (load_reg == -1)
+ continue;
+
+ /* NOTE: arg "reg" (the fourth one) is only used for
+ * BPF_STX + SRC_OP, so it is safe to pass NULL
+ * here.
+ */
+ if (bpf_is_reg64(&insn, load_reg, NULL, DST_OP)) {
+ if (class == BPF_LD &&
+ BPF_MODE(code) == BPF_IMM)
+ i++;
+ continue;
+ }
+
+ /* ctx load could be transformed into wider load. */
+ if (class == BPF_LDX &&
+ aux[adj_idx].ptr_type == PTR_TO_CTX)
+ continue;
+
+ imm_rnd = get_random_u32();
+ rnd_hi32_patch[0] = insn;
+ rnd_hi32_patch[1].imm = imm_rnd;
+ rnd_hi32_patch[3].dst_reg = load_reg;
+ patch = rnd_hi32_patch;
+ patch_len = 4;
+ goto apply_patch_buffer;
+ }
+
+ /* Add in an zero-extend instruction if a) the JIT has requested
+ * it or b) it's a CMPXCHG.
+ *
+ * The latter is because: BPF_CMPXCHG always loads a value into
+ * R0, therefore always zero-extends. However some archs'
+ * equivalent instruction only does this load when the
+ * comparison is successful. This detail of CMPXCHG is
+ * orthogonal to the general zero-extension behaviour of the
+ * CPU, so it's treated independently of bpf_jit_needs_zext.
+ */
+ if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
+ continue;
+
+ /* Zero-extension is done by the caller. */
+ if (bpf_pseudo_kfunc_call(&insn))
+ continue;
+
+ if (verifier_bug_if(load_reg == -1, env,
+ "zext_dst is set, but no reg is defined"))
+ return -EFAULT;
+
+ zext_patch[0] = insn;
+ zext_patch[1].dst_reg = load_reg;
+ zext_patch[1].src_reg = load_reg;
+ patch = zext_patch;
+ patch_len = 2;
+apply_patch_buffer:
+ new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
+ if (!new_prog)
+ return -ENOMEM;
+ env->prog = new_prog;
+ insns = new_prog->insnsi;
+ aux = env->insn_aux_data;
+ delta += patch_len - 1;
+ }
+
+ return 0;
+}
+
+/* convert load instructions that access fields of a context type into a
+ * sequence of instructions that access fields of the underlying structure:
+ * struct __sk_buff -> struct sk_buff
+ * struct bpf_sock_ops -> struct sock
+ */
+int bpf_convert_ctx_accesses(struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *subprogs = env->subprog_info;
+ const struct bpf_verifier_ops *ops = env->ops;
+ int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0;
+ const int insn_cnt = env->prog->len;
+ struct bpf_insn *epilogue_buf = env->epilogue_buf;
+ struct bpf_insn *insn_buf = env->insn_buf;
+ struct bpf_insn *insn;
+ u32 target_size, size_default, off;
+ struct bpf_prog *new_prog;
+ enum bpf_access_type type;
+ bool is_narrower_load;
+ int epilogue_idx = 0;
+
+ if (ops->gen_epilogue) {
+ epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
+ -(subprogs[0].stack_depth + 8));
+ if (epilogue_cnt >= INSN_BUF_SIZE) {
+ verifier_bug(env, "epilogue is too long");
+ return -EFAULT;
+ } else if (epilogue_cnt) {
+ /* Save the ARG_PTR_TO_CTX for the epilogue to use */
+ cnt = 0;
+ subprogs[0].stack_depth += 8;
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
+ -subprogs[0].stack_depth);
+ insn_buf[cnt++] = env->prog->insnsi[0];
+ new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+ env->prog = new_prog;
+ delta += cnt - 1;
+
+ ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ if (ops->gen_prologue || env->seen_direct_write) {
+ if (!ops->gen_prologue) {
+ verifier_bug(env, "gen_prologue is null");
+ return -EFAULT;
+ }
+ cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
+ env->prog);
+ if (cnt >= INSN_BUF_SIZE) {
+ verifier_bug(env, "prologue is too long");
+ return -EFAULT;
+ } else if (cnt) {
+ new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ env->prog = new_prog;
+ delta += cnt - 1;
+
+ ret = add_kfunc_in_insns(env, insn_buf, cnt - 1);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ if (delta)
+ WARN_ON(adjust_jmp_off(env->prog, 0, delta));
+
+ if (bpf_prog_is_offloaded(env->prog->aux))
+ return 0;
+
+ insn = env->prog->insnsi + delta;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ bpf_convert_ctx_access_t convert_ctx_access;
+ u8 mode;
+
+ if (env->insn_aux_data[i + delta].nospec) {
+ WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state);
+ struct bpf_insn *patch = insn_buf;
+
+ *patch++ = BPF_ST_NOSPEC();
+ *patch++ = *insn;
+ cnt = patch - insn_buf;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ /* This can not be easily merged with the
+ * nospec_result-case, because an insn may require a
+ * nospec before and after itself. Therefore also do not
+ * 'continue' here but potentially apply further
+ * patching to insn. *insn should equal patch[1] now.
+ */
+ }
+
+ if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
+ insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
+ insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
+ insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
+ insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
+ insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
+ type = BPF_READ;
+ } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
+ type = BPF_WRITE;
+ } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
+ insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
+ insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
+ insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
+ env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
+ insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
+ env->prog->aux->num_exentries++;
+ continue;
+ } else if (insn->code == (BPF_JMP | BPF_EXIT) &&
+ epilogue_cnt &&
+ i + delta < subprogs[1].start) {
+ /* Generate epilogue for the main prog */
+ if (epilogue_idx) {
+ /* jump back to the earlier generated epilogue */
+ insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
+ cnt = 1;
+ } else {
+ memcpy(insn_buf, epilogue_buf,
+ epilogue_cnt * sizeof(*epilogue_buf));
+ cnt = epilogue_cnt;
+ /* epilogue_idx cannot be 0. It must have at
+ * least one ctx ptr saving insn before the
+ * epilogue.
+ */
+ epilogue_idx = i + delta;
+ }
+ goto patch_insn_buf;
+ } else {
+ continue;
+ }
+
+ if (type == BPF_WRITE &&
+ env->insn_aux_data[i + delta].nospec_result) {
+ /* nospec_result is only used to mitigate Spectre v4 and
+ * to limit verification-time for Spectre v1.
+ */
+ struct bpf_insn *patch = insn_buf;
+
+ *patch++ = *insn;
+ *patch++ = BPF_ST_NOSPEC();
+ cnt = patch - insn_buf;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
+
+ switch ((int)env->insn_aux_data[i + delta].ptr_type) {
+ case PTR_TO_CTX:
+ if (!ops->convert_ctx_access)
+ continue;
+ convert_ctx_access = ops->convert_ctx_access;
+ break;
+ case PTR_TO_SOCKET:
+ case PTR_TO_SOCK_COMMON:
+ convert_ctx_access = bpf_sock_convert_ctx_access;
+ break;
+ case PTR_TO_TCP_SOCK:
+ convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
+ break;
+ case PTR_TO_XDP_SOCK:
+ convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
+ break;
+ case PTR_TO_BTF_ID:
+ case PTR_TO_BTF_ID | PTR_UNTRUSTED:
+ /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
+ * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
+ * be said once it is marked PTR_UNTRUSTED, hence we must handle
+ * any faults for loads into such types. BPF_WRITE is disallowed
+ * for this case.
+ */
+ case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
+ case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
+ if (type == BPF_READ) {
+ if (BPF_MODE(insn->code) == BPF_MEM)
+ insn->code = BPF_LDX | BPF_PROBE_MEM |
+ BPF_SIZE((insn)->code);
+ else
+ insn->code = BPF_LDX | BPF_PROBE_MEMSX |
+ BPF_SIZE((insn)->code);
+ env->prog->aux->num_exentries++;
+ }
+ continue;
+ case PTR_TO_ARENA:
+ if (BPF_MODE(insn->code) == BPF_MEMSX) {
+ if (!bpf_jit_supports_insn(insn, true)) {
+ verbose(env, "sign extending loads from arena are not supported yet\n");
+ return -EOPNOTSUPP;
+ }
+ insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code);
+ } else {
+ insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
+ }
+ env->prog->aux->num_exentries++;
+ continue;
+ default:
+ continue;
+ }
+
+ ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
+ size = BPF_LDST_BYTES(insn);
+ mode = BPF_MODE(insn->code);
+
+ /* If the read access is a narrower load of the field,
+ * convert to a 4/8-byte load, to minimum program type specific
+ * convert_ctx_access changes. If conversion is successful,
+ * we will apply proper mask to the result.
+ */
+ is_narrower_load = size < ctx_field_size;
+ size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
+ off = insn->off;
+ if (is_narrower_load) {
+ u8 size_code;
+
+ if (type == BPF_WRITE) {
+ verifier_bug(env, "narrow ctx access misconfigured");
+ return -EFAULT;
+ }
+
+ size_code = BPF_H;
+ if (ctx_field_size == 4)
+ size_code = BPF_W;
+ else if (ctx_field_size == 8)
+ size_code = BPF_DW;
+
+ insn->off = off & ~(size_default - 1);
+ insn->code = BPF_LDX | BPF_MEM | size_code;
+ }
+
+ target_size = 0;
+ cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
+ &target_size);
+ if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
+ (ctx_field_size && !target_size)) {
+ verifier_bug(env, "error during ctx access conversion (%d)", cnt);
+ return -EFAULT;
+ }
+
+ if (is_narrower_load && size < target_size) {
+ u8 shift = bpf_ctx_narrow_access_offset(
+ off, size, size_default) * 8;
+ if (shift && cnt + 1 >= INSN_BUF_SIZE) {
+ verifier_bug(env, "narrow ctx load misconfigured");
+ return -EFAULT;
+ }
+ if (ctx_field_size <= 4) {
+ if (shift)
+ insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
+ insn->dst_reg,
+ shift);
+ insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
+ (1 << size * 8) - 1);
+ } else {
+ if (shift)
+ insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
+ insn->dst_reg,
+ shift);
+ insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
+ (1ULL << size * 8) - 1);
+ }
+ }
+ if (mode == BPF_MEMSX)
+ insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
+ insn->dst_reg, insn->dst_reg,
+ size * 8, 0);
+
+patch_insn_buf:
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+
+ /* keep walking new program and skip insns we just inserted */
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ }
+
+ return 0;
+}
+
+int bpf_jit_subprogs(struct bpf_verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog, **func, *tmp;
+ int i, j, subprog_start, subprog_end = 0, len, subprog;
+ struct bpf_map *map_ptr;
+ struct bpf_insn *insn;
+ void *old_bpf_func;
+ int err, num_exentries;
+ int old_len, subprog_start_adjustment = 0;
+
+ if (env->subprog_cnt <= 1)
+ return 0;
+
+ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+ if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
+ continue;
+
+ /* Upon error here we cannot fall back to interpreter but
+ * need a hard reject of the program. Thus -EFAULT is
+ * propagated in any case.
+ */
+ subprog = bpf_find_subprog(env, i + insn->imm + 1);
+ if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d",
+ i + insn->imm + 1))
+ return -EFAULT;
+ /* temporarily remember subprog id inside insn instead of
+ * aux_data, since next loop will split up all insns into funcs
+ */
+ insn->off = subprog;
+ /* remember original imm in case JIT fails and fallback
+ * to interpreter will be needed
+ */
+ env->insn_aux_data[i].call_imm = insn->imm;
+ /* point imm to __bpf_call_base+1 from JITs point of view */
+ insn->imm = 1;
+ if (bpf_pseudo_func(insn)) {
+#if defined(MODULES_VADDR)
+ u64 addr = MODULES_VADDR;
+#else
+ u64 addr = VMALLOC_START;
+#endif
+ /* jit (e.g. x86_64) may emit fewer instructions
+ * if it learns a u32 imm is the same as a u64 imm.
+ * Set close enough to possible prog address.
+ */
+ insn[0].imm = (u32)addr;
+ insn[1].imm = addr >> 32;
+ }
+ }
+
+ err = bpf_prog_alloc_jited_linfo(prog);
+ if (err)
+ goto out_undo_insn;
+
+ err = -ENOMEM;
+ func = kzalloc_objs(prog, env->subprog_cnt);
+ if (!func)
+ goto out_undo_insn;
+
+ for (i = 0; i < env->subprog_cnt; i++) {
+ subprog_start = subprog_end;
+ subprog_end = env->subprog_info[i + 1].start;
+
+ len = subprog_end - subprog_start;
+ /* bpf_prog_run() doesn't call subprogs directly,
+ * hence main prog stats include the runtime of subprogs.
+ * subprogs don't have IDs and not reachable via prog_get_next_id
+ * func[i]->stats will never be accessed and stays NULL
+ */
+ func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
+ if (!func[i])
+ goto out_free;
+ memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
+ len * sizeof(struct bpf_insn));
+ func[i]->type = prog->type;
+ func[i]->len = len;
+ if (bpf_prog_calc_tag(func[i]))
+ goto out_free;
+ func[i]->is_func = 1;
+ func[i]->sleepable = prog->sleepable;
+ func[i]->aux->func_idx = i;
+ /* Below members will be freed only at prog->aux */
+ func[i]->aux->btf = prog->aux->btf;
+ func[i]->aux->subprog_start = subprog_start + subprog_start_adjustment;
+ func[i]->aux->func_info = prog->aux->func_info;
+ func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
+ func[i]->aux->poke_tab = prog->aux->poke_tab;
+ func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
+ func[i]->aux->main_prog_aux = prog->aux;
+
+ for (j = 0; j < pr