diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2026-03-16 11:26:42 -0700 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2026-03-16 11:26:42 -0700 |
| commit | 6c8e1a9eee0fec802b542dadf768c30c2a183b3c (patch) | |
| tree | 7eea18c945bea1fd31c54818de9794ced6b767d5 /kernel | |
| parent | 202e42e4aa890172366354b233c42c73107a3f59 (diff) | |
| parent | 01d5d2f7d93de7270f0bf3bcba36f6f4d3d0bf9d (diff) | |
Merge branch 'bpf-relax-8-frame-limitation-for-global-subprogs'
Emil Tsalapatis says:
====================
bpf: Relax 8 frame limitation for global subprogs
The BPF verifier currently limits the maximum runtime call stack to
8 frames. Larger BPF programs like sched-ext schedulers routinely
fail verification because they exceed this limit, even as they use
very little actual stack space for each frame.
Relax the verifier to permit call stacks > 8 frames deep when the
call stacks include global subprogs. The old 8 stack frame limit now
only applies to call stacks composed entirely of static function calls.
This works because global functions are each verified in isolation, so
the verifier does not need to cross-reference verification state across
the function call boundary, which has been the reason for limiting the
call stack size in the first place.
This patch does not change the verification time limit of 8 stack
frames. Static functions that are inlined for verification purposes
still only go 8 frames deep to avoid changing the verifier's internal
data structures used for verification. These data structures only
support holding information on up to 8 stack frames.
This patch also does not adjust the actual maximum stack size of 512.
CHANGELOG
=========
v5 -> v6 (https://lore.kernel.org/bpf/20260311182831.91219-1-emil@etsalapatis.com/)
- Make bpf_subprog_call_depth_info internal to verifier.c (Alexei)
v4 -> v5 (https://lore.kernel.org/bpf/20260309204430.201219-1-emil@etsalapatis.com/)
- Move depth tracking state to verifier (Eduard) and free it after verification (Alexei)
- Fix selftest patch title and formatting errors (Yonghong)
v3 -> v4 (https://lore.kernel.org/bpf/20260303043106.406099-1-emil@etsalapatis.com/)
- Factor out temp call depth tracking info into its own struct (Eduard)
- Bring depth calculation loop in line with the other instances (Mykyta)
- Add comment on why selftest call stack is 16 bytes/frame (Eduard)
- Rename "cidx" to "caller" for clarity (Mykyta, Eduard)
v2 -> v3 (https://lore.kernel.org/bpf/20260210213606.475415-1-emil@etsalapatis.com/)
- Change logic to remove arbitrary limit on call depth (Eduard)
- Add additional selftests (Eduard)
v1 -> v2 (https://lore.kernel.org/bpf/20260202233716.835638-1-emil@etsalapatis.com)
- Adjust patch to only increase the runtime stack depth, leaving the
verification-time stack depth unchanged (Alexei)
Signed-off-by: Emil Tsalapatis <emil@etsalapatis.com>
====================
Link: https://patch.msgid.link/20260316161225.128011-1-emil@etsalapatis.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/verifier.c | 76 |
1 files changed, 54 insertions, 22 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e29f15419fcb..01c18f4268de 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6724,22 +6724,30 @@ static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth) return round_up(max_t(u32, stack_depth, 1), 32); } +/* temporary state used for call frame depth calculation */ +struct bpf_subprog_call_depth_info { + int ret_insn; /* caller instruction where we return to. */ + int caller; /* caller subprogram idx */ + int frame; /* # of consecutive static call stack frames on top of stack */ +}; + /* starting from main bpf function walk all instructions of the function * and recursively walk all callees that given function can call. * Ignore jump and exit insns. - * Since recursion is prevented by check_cfg() this algorithm - * only needs a local stack of MAX_CALL_FRAMES to remember callsites */ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx, + struct bpf_subprog_call_depth_info *dinfo, bool priv_stack_supported) { struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn *insn = env->prog->insnsi; int depth = 0, frame = 0, i, subprog_end, subprog_depth; bool tail_call_reachable = false; - int ret_insn[MAX_CALL_FRAMES]; - int ret_prog[MAX_CALL_FRAMES]; - int j; + int total; + int tmp; + + /* no caller idx */ + dinfo[idx].caller = -1; i = subprog[idx].start; if (!priv_stack_supported) @@ -6791,8 +6799,12 @@ process_func: } else { depth += subprog_depth; if (depth > MAX_BPF_STACK) { + total = 0; + for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller) + total++; + verbose(env, "combined stack size of %d calls is %d. Too large\n", - frame + 1, depth); + total, depth); return -EACCES; } } @@ -6806,10 +6818,8 @@ continue_func: if (!is_bpf_throw_kfunc(insn + i)) continue; - if (subprog[idx].is_cb) - err = true; - for (int c = 0; c < frame && !err; c++) { - if (subprog[ret_prog[c]].is_cb) { + for (tmp = idx; tmp >= 0 && !err; tmp = dinfo[tmp].caller) { + if (subprog[tmp].is_cb) { err = true; break; } @@ -6825,8 +6835,6 @@ continue_func: if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) continue; /* remember insn and function to return to */ - ret_insn[frame] = i + 1; - ret_prog[frame] = idx; /* find the callee */ next_insn = i + insn[i].imm + 1; @@ -6846,7 +6854,16 @@ continue_func: return -EINVAL; } } + + /* store caller info for after we return from callee */ + dinfo[idx].frame = frame; + dinfo[idx].ret_insn = i + 1; + + /* push caller idx into callee's dinfo */ + dinfo[sidx].caller = idx; + i = next_insn; + idx = sidx; if (!priv_stack_supported) subprog[idx].priv_stack_mode = NO_PRIV_STACK; @@ -6854,7 +6871,7 @@ continue_func: if (subprog[idx].has_tail_call) tail_call_reachable = true; - frame++; + frame = subprog_is_global(env, idx) ? 0 : frame + 1; if (frame >= MAX_CALL_FRAMES) { verbose(env, "the call stack of %d frames is too deep !\n", frame); @@ -6868,12 +6885,12 @@ continue_func: * tail call counter throughout bpf2bpf calls combined with tailcalls */ if (tail_call_reachable) - for (j = 0; j < frame; j++) { - if (subprog[ret_prog[j]].is_exception_cb) { + for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller) { + if (subprog[tmp].is_exception_cb) { verbose(env, "cannot tail call within exception cb\n"); return -EINVAL; } - subprog[ret_prog[j]].tail_call_reachable = true; + subprog[tmp].tail_call_reachable = true; } if (subprog[0].tail_call_reachable) env->prog->aux->tail_call_reachable = true; @@ -6881,23 +6898,33 @@ continue_func: /* end of for() loop means the last insn of the 'subprog' * was reached. Doesn't matter whether it was JA or EXIT */ - if (frame == 0) + if (frame == 0 && dinfo[idx].caller < 0) return 0; if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE) depth -= round_up_stack_depth(env, subprog[idx].stack_depth); - frame--; - i = ret_insn[frame]; - idx = ret_prog[frame]; + + /* pop caller idx from callee */ + idx = dinfo[idx].caller; + + /* retrieve caller state from its frame */ + frame = dinfo[idx].frame; + i = dinfo[idx].ret_insn; + goto continue_func; } static int check_max_stack_depth(struct bpf_verifier_env *env) { enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN; + struct bpf_subprog_call_depth_info *dinfo; struct bpf_subprog_info *si = env->subprog_info; bool priv_stack_supported; int ret; + dinfo = kvcalloc(env->subprog_cnt, sizeof(*dinfo), GFP_KERNEL_ACCOUNT); + if (!dinfo) + return -ENOMEM; + for (int i = 0; i < env->subprog_cnt; i++) { if (si[i].has_tail_call) { priv_stack_mode = NO_PRIV_STACK; @@ -6919,9 +6946,12 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) for (int i = env->subprog_cnt - 1; i >= 0; i--) { if (!i || si[i].is_async_cb) { priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE; - ret = check_max_stack_depth_subprog(env, i, priv_stack_supported); - if (ret < 0) + ret = check_max_stack_depth_subprog(env, i, dinfo, + priv_stack_supported); + if (ret < 0) { + kvfree(dinfo); return ret; + } } } @@ -6932,6 +6962,8 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) } } + kvfree(dinfo); + return 0; } |
