aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-04 13:38:03 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-04 13:38:03 -0700
commit0326074ff4652329f2a1a9c8685104576bd8d131 (patch)
tree9a7574c7ccb05bf4c7cb34fc5a65457bb8f495cb /kernel
parent522667b24f08009591c90e75bfe2ffb67f555498 (diff)
parent681bf011b9b5989c6e9db6beb64494918aab9a43 (diff)
Merge tag 'net-next-6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - Introduce and use a single page frag cache for allocating small skb heads, clawing back the 10-20% performance regression in UDP flood test from previous fixes. - Run packets which already went thru HW coalescing thru SW GRO. This significantly improves TCP segment coalescing and simplifies deployments as different workloads benefit from HW or SW GRO. - Shrink the size of the base zero-copy send structure. - Move TCP init under a new slow / sleepable version of DO_ONCE(). BPF: - Add BPF-specific, any-context-safe memory allocator. - Add helpers/kfuncs for PKCS#7 signature verification from BPF programs. - Define a new map type and related helpers for user space -> kernel communication over a ring buffer (BPF_MAP_TYPE_USER_RINGBUF). - Allow targeting BPF iterators to loop through resources of one task/thread. - Add ability to call selected destructive functions. Expose crash_kexec() to allow BPF to trigger a kernel dump. Use CAP_SYS_BOOT check on the loading process to judge permissions. - Enable BPF to collect custom hierarchical cgroup stats efficiently by integrating with the rstat framework. - Support struct arguments for trampoline based programs. Only structs with size <= 16B and x86 are supported. - Invoke cgroup/connect{4,6} programs for unprivileged ICMP ping sockets (instead of just TCP and UDP sockets). - Add a helper for accessing CLOCK_TAI for time sensitive network related programs. - Support accessing network tunnel metadata's flags. - Make TCP SYN ACK RTO tunable by BPF programs with TCP Fast Open. - Add support for writing to Netfilter's nf_conn:mark. Protocols: - WiFi: more Extremely High Throughput (EHT) and Multi-Link Operation (MLO) work (802.11be, WiFi 7). - vsock: improve support for SO_RCVLOWAT. - SMC: support SO_REUSEPORT. - Netlink: define and document how to use netlink in a "modern" way. Support reporting missing attributes via extended ACK. - IPSec: support collect metadata mode for xfrm interfaces. - TCPv6: send consistent autoflowlabel in SYN_RECV state and RST packets. - TCP: introduce optional per-netns connection hash table to allow better isolation between namespaces (opt-in, at the cost of memory and cache pressure). - MPTCP: support TCP_FASTOPEN_CONNECT. - Add NEXT-C-SID support in Segment Routing (SRv6) End behavior. - Adjust IP_UNICAST_IF sockopt behavior for connected UDP sockets. - Open vSwitch: - Allow specifying ifindex of new interfaces. - Allow conntrack and metering in non-initial user namespace. - TLS: support the Korean ARIA-GCM crypto algorithm. - Remove DECnet support. Driver API: - Allow selecting the conduit interface used by each port in DSA switches, at runtime. - Ethernet Power Sourcing Equipment and Power Device support. - Add tc-taprio support for queueMaxSDU parameter, i.e. setting per traffic class max frame size for time-based packet schedules. - Support PHY rate matching - adapting between differing host-side and link-side speeds. - Introduce QUSGMII PHY mode and 1000BASE-KX interface mode. - Validate OF (device tree) nodes for DSA shared ports; make phylink-related properties mandatory on DSA and CPU ports. Enforcing more uniformity should allow transitioning to phylink. - Require that flash component name used during update matches one of the components for which version is reported by info_get(). - Remove "weight" argument from driver-facing NAPI API as much as possible. It's one of those magic knobs which seemed like a good idea at the time but is too indirect to use in practice. - Support offload of TLS connections with 256 bit keys. New hardware / drivers: - Ethernet: - Microchip KSZ9896 6-port Gigabit Ethernet Switch - Renesas Ethernet AVB (EtherAVB-IF) Gen4 SoCs - Analog Devices ADIN1110 and ADIN2111 industrial single pair Ethernet (10BASE-T1L) MAC+PHY. - Rockchip RV1126 Gigabit Ethernet (a version of stmmac IP). - Ethernet SFPs / modules: - RollBall / Hilink / Turris 10G copper SFPs - HALNy GPON module - WiFi: - CYW43439 SDIO chipset (brcmfmac) - CYW89459 PCIe chipset (brcmfmac) - BCM4378 on Apple platforms (brcmfmac) Drivers: - CAN: - gs_usb: HW timestamp support - Ethernet PHYs: - lan8814: cable diagnostics - Ethernet NICs: - Intel (100G): - implement control of FCS/CRC stripping - port splitting via devlink - L2TPv3 filtering offload - nVidia/Mellanox: - tunnel offload for sub-functions - MACSec offload, w/ Extended packet number and replay window offload - significantly restructure, and optimize the AF_XDP support, align the behavior with other vendors - Huawei: - configuring DSCP map for traffic class selection - querying standard FEC statistics - querying SerDes lane number via ethtool - Marvell/Cavium: - egress priority flow control - MACSec offload - AMD/SolarFlare: - PTP over IPv6 and raw Ethernet - small / embedded: - ax88772: convert to phylink (to support SFP cages) - altera: tse: convert to phylink - ftgmac100: support fixed link - enetc: standard Ethtool counters - macb: ZynqMP SGMII dynamic configuration support - tsnep: support multi-queue and use page pool - lan743x: Rx IP & TCP checksum offload - igc: add xdp frags support to ndo_xdp_xmit - Ethernet high-speed switches: - Marvell (prestera): - support SPAN port features (traffic mirroring) - nexthop object offloading - Microchip (sparx5): - multicast forwarding offload - QoS queuing offload (tc-mqprio, tc-tbf, tc-ets) - Ethernet embedded switches: - Marvell (mv88e6xxx): - support RGMII cmode - NXP (felix): - standardized ethtool counters - Microchip (lan966x): - QoS queuing offload (tc-mqprio, tc-tbf, tc-cbs, tc-ets) - traffic policing and mirroring - link aggregation / bonding offload - QUSGMII PHY mode support - Qualcomm 802.11ax WiFi (ath11k): - cold boot calibration support on WCN6750 - support to connect to a non-transmit MBSSID AP profile - enable remain-on-channel support on WCN6750 - Wake-on-WLAN support for WCN6750 - support to provide transmit power from firmware via nl80211 - support to get power save duration for each client - spectral scan support for 160 MHz - MediaTek WiFi (mt76): - WiFi-to-Ethernet bridging offload for MT7986 chips - RealTek WiFi (rtw89): - P2P support" * tag 'net-next-6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1864 commits) eth: pse: add missing static inlines once: rename _SLOW to _SLEEPABLE net: pse-pd: add regulator based PSE driver dt-bindings: net: pse-dt: add bindings for regulator based PoDL PSE controller ethtool: add interface to interact with Ethernet Power Equipment net: mdiobus: search for PSE nodes by parsing PHY nodes. net: mdiobus: fwnode_mdiobus_register_phy() rework error handling net: add framework to support Ethernet PSE and PDs devices dt-bindings: net: phy: add PoDL PSE property net: marvell: prestera: Propagate nh state from hw to kernel net: marvell: prestera: Add neighbour cache accounting net: marvell: prestera: add stub handler neighbour events net: marvell: prestera: Add heplers to interact with fib_notifier_info net: marvell: prestera: Add length macros for prestera_ip_addr net: marvell: prestera: add delayed wq and flush wq on deinit net: marvell: prestera: Add strict cleanup of fib arbiter net: marvell: prestera: Add cleanup of allocated fib_nodes net: marvell: prestera: Add router nexthops ABI eth: octeon: fix build after netif_napi_add() changes net/mlx5: E-Switch, Return EBUSY if can't get mode lock ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/Makefile5
-rw-r--r--kernel/bpf/arraymap.c33
-rw-r--r--kernel/bpf/bpf_iter.c10
-rw-r--r--kernel/bpf/bpf_local_storage.c10
-rw-r--r--kernel/bpf/bpf_lsm.c23
-rw-r--r--kernel/bpf/bpf_task_storage.c8
-rw-r--r--kernel/bpf/btf.c287
-rw-r--r--kernel/bpf/cgroup.c185
-rw-r--r--kernel/bpf/cgroup_iter.c282
-rw-r--r--kernel/bpf/core.c10
-rw-r--r--kernel/bpf/cpumap.c6
-rw-r--r--kernel/bpf/devmap.c6
-rw-r--r--kernel/bpf/dispatcher.c27
-rw-r--r--kernel/bpf/hashtab.c206
-rw-r--r--kernel/bpf/helpers.c120
-rw-r--r--kernel/bpf/local_storage.c5
-rw-r--r--kernel/bpf/lpm_trie.c4
-rw-r--r--kernel/bpf/memalloc.c635
-rw-r--r--kernel/bpf/offload.c6
-rw-r--r--kernel/bpf/percpu_freelist.c48
-rw-r--r--kernel/bpf/queue_stack_maps.c2
-rw-r--r--kernel/bpf/ringbuf.c253
-rw-r--r--kernel/bpf/syscall.c46
-rw-r--r--kernel/bpf/task_iter.c224
-rw-r--r--kernel/bpf/trampoline.c68
-rw-r--r--kernel/bpf/verifier.c588
-rw-r--r--kernel/cgroup/cgroup.c5
-rw-r--r--kernel/cgroup/rstat.c48
-rw-r--r--kernel/kprobes.c6
-rw-r--r--kernel/taskstats.c1
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/bpf_trace.c211
-rw-r--r--kernel/trace/ftrace.c3
33 files changed, 2691 insertions, 686 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 057ba8e01e70..341c94f208f4 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
-obj-$(CONFIG_BPF_SYSCALL) += btf.o
+obj-$(CONFIG_BPF_SYSCALL) += btf.o memalloc.o
obj-$(CONFIG_BPF_JIT) += dispatcher.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
@@ -24,6 +24,9 @@ endif
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
endif
+ifeq ($(CONFIG_CGROUPS),y)
+obj-$(CONFIG_BPF_SYSCALL) += cgroup_iter.o
+endif
obj-$(CONFIG_CGROUP_BPF) += cgroup.o
ifeq ($(CONFIG_INET),y)
obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 624527401d4d..832b2659e96e 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -279,7 +279,8 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
+ copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(map, value + off);
off += size;
}
rcu_read_unlock();
@@ -338,8 +339,9 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
return -EINVAL;
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
- memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
- value, map->value_size);
+ val = this_cpu_ptr(array->pptrs[index & array->index_mask]);
+ copy_map_value(map, val, value);
+ check_and_free_fields(array, val);
} else {
val = array->value +
(u64)array->elem_size * (index & array->index_mask);
@@ -383,7 +385,8 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
+ copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
+ check_and_free_fields(array, per_cpu_ptr(pptr, cpu));
off += size;
}
rcu_read_unlock();
@@ -421,8 +424,20 @@ static void array_map_free(struct bpf_map *map)
int i;
if (map_value_has_kptrs(map)) {
- for (i = 0; i < array->map.max_entries; i++)
- bpf_map_free_kptrs(map, array_map_elem_ptr(array, i));
+ if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+ for (i = 0; i < array->map.max_entries; i++) {
+ void __percpu *pptr = array->pptrs[i & array->index_mask];
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ bpf_map_free_kptrs(map, per_cpu_ptr(pptr, cpu));
+ cond_resched();
+ }
+ }
+ } else {
+ for (i = 0; i < array->map.max_entries; i++)
+ bpf_map_free_kptrs(map, array_map_elem_ptr(array, i));
+ }
bpf_map_free_kptr_off_tab(map);
}
@@ -608,9 +623,9 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
pptr = v;
size = array->elem_size;
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(info->percpu_value_buf + off,
- per_cpu_ptr(pptr, cpu),
- size);
+ copy_map_value_long(map, info->percpu_value_buf + off,
+ per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(map, info->percpu_value_buf + off);
off += size;
}
ctx.value = info->percpu_value_buf;
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 24b755eca0b3..5dc307bdeaeb 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -202,6 +202,11 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
}
stop:
offs = seq->count;
+ if (IS_ERR(p)) {
+ seq->op->stop(seq, NULL);
+ err = PTR_ERR(p);
+ goto done;
+ }
/* bpf program called if !p */
seq->op->stop(seq, p);
if (!p) {
@@ -689,19 +694,24 @@ struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop)
int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
{
+ struct bpf_run_ctx run_ctx, *old_run_ctx;
int ret;
if (prog->aux->sleepable) {
rcu_read_lock_trace();
migrate_disable();
might_fault();
+ old_run_ctx = bpf_set_run_ctx(&run_ctx);
ret = bpf_prog_run(prog, ctx);
+ bpf_reset_run_ctx(old_run_ctx);
migrate_enable();
rcu_read_unlock_trace();
} else {
rcu_read_lock();
migrate_disable();
+ old_run_ctx = bpf_set_run_ctx(&run_ctx);
ret = bpf_prog_run(prog, ctx);
+ bpf_reset_run_ctx(old_run_ctx);
migrate_enable();
rcu_read_unlock();
}
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 8ce40fd869f6..802fc15b0d73 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -555,11 +555,11 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem, map_node))) {
if (busy_counter) {
migrate_disable();
- __this_cpu_inc(*busy_counter);
+ this_cpu_inc(*busy_counter);
}
bpf_selem_unlink(selem, false);
if (busy_counter) {
- __this_cpu_dec(*busy_counter);
+ this_cpu_dec(*busy_counter);
migrate_enable();
}
cond_resched_rcu();
@@ -582,7 +582,7 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
synchronize_rcu();
kvfree(smap->buckets);
- kfree(smap);
+ bpf_map_area_free(smap);
}
int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
@@ -610,7 +610,7 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
unsigned int i;
u32 nbuckets;
- smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
+ smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
if (!smap)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
@@ -623,7 +623,7 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
if (!smap->buckets) {
- kfree(smap);
+ bpf_map_area_free(smap);
return ERR_PTR(-ENOMEM);
}
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 761998fda762..d6c9b3705f24 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -41,17 +41,21 @@ BTF_SET_END(bpf_lsm_hooks)
*/
BTF_SET_START(bpf_lsm_current_hooks)
/* operate on freshly allocated sk without any cgroup association */
+#ifdef CONFIG_SECURITY_NETWORK
BTF_ID(func, bpf_lsm_sk_alloc_security)
BTF_ID(func, bpf_lsm_sk_free_security)
+#endif
BTF_SET_END(bpf_lsm_current_hooks)
/* List of LSM hooks that trigger while the socket is properly locked.
*/
BTF_SET_START(bpf_lsm_locked_sockopt_hooks)
+#ifdef CONFIG_SECURITY_NETWORK
BTF_ID(func, bpf_lsm_socket_sock_rcv_skb)
BTF_ID(func, bpf_lsm_sock_graft)
BTF_ID(func, bpf_lsm_inet_csk_clone)
BTF_ID(func, bpf_lsm_inet_conn_established)
+#endif
BTF_SET_END(bpf_lsm_locked_sockopt_hooks)
/* List of LSM hooks that trigger while the socket is _not_ locked,
@@ -59,8 +63,10 @@ BTF_SET_END(bpf_lsm_locked_sockopt_hooks)
* in the early init phase.
*/
BTF_SET_START(bpf_lsm_unlocked_sockopt_hooks)
+#ifdef CONFIG_SECURITY_NETWORK
BTF_ID(func, bpf_lsm_socket_post_create)
BTF_ID(func, bpf_lsm_socket_socketpair)
+#endif
BTF_SET_END(bpf_lsm_unlocked_sockopt_hooks)
#ifdef CONFIG_CGROUP_BPF
@@ -189,6 +195,14 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto = {
static const struct bpf_func_proto *
bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
+ const struct bpf_func_proto *func_proto;
+
+ if (prog->expected_attach_type == BPF_LSM_CGROUP) {
+ func_proto = cgroup_common_func_proto(func_id, prog);
+ if (func_proto)
+ return func_proto;
+ }
+
switch (func_id) {
case BPF_FUNC_inode_storage_get:
return &bpf_inode_storage_get_proto;
@@ -212,15 +226,6 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL;
case BPF_FUNC_get_attach_cookie:
return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL;
- case BPF_FUNC_get_local_storage:
- return prog->expected_attach_type == BPF_LSM_CGROUP ?
- &bpf_get_local_storage_proto : NULL;
- case BPF_FUNC_set_retval:
- return prog->expected_attach_type == BPF_LSM_CGROUP ?
- &bpf_set_retval_proto : NULL;
- case BPF_FUNC_get_retval:
- return prog->expected_attach_type == BPF_LSM_CGROUP ?
- &bpf_get_retval_proto : NULL;
#ifdef CONFIG_NET
case BPF_FUNC_setsockopt:
if (prog->expected_attach_type != BPF_LSM_CGROUP)
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index e9014dc62682..6f290623347e 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -26,20 +26,20 @@ static DEFINE_PER_CPU(int, bpf_task_storage_busy);
static void bpf_task_storage_lock(void)
{
migrate_disable();
- __this_cpu_inc(bpf_task_storage_busy);
+ this_cpu_inc(bpf_task_storage_busy);
}
static void bpf_task_storage_unlock(void)
{
- __this_cpu_dec(bpf_task_storage_busy);
+ this_cpu_dec(bpf_task_storage_busy);
migrate_enable();
}
static bool bpf_task_storage_trylock(void)
{
migrate_disable();
- if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
- __this_cpu_dec(bpf_task_storage_busy);
+ if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
+ this_cpu_dec(bpf_task_storage_busy);
migrate_enable();
return false;
}
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 7e64447659f3..eba603cec2c5 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -208,7 +208,7 @@ enum btf_kfunc_hook {
};
enum {
- BTF_KFUNC_SET_MAX_CNT = 32,
+ BTF_KFUNC_SET_MAX_CNT = 256,
BTF_DTOR_KFUNC_MAX_CNT = 256,
};
@@ -818,6 +818,7 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
return NULL;
return btf->types[type_id];
}
+EXPORT_SYMBOL_GPL(btf_type_by_id);
/*
* Regular int is not a bit field and it must be either
@@ -1396,7 +1397,6 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
const char *fmt, ...)
{
struct bpf_verifier_log *log = &env->log;
- u8 kind = BTF_INFO_KIND(t->info);
struct btf *btf = env->btf;
va_list args;
@@ -1412,7 +1412,7 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
__btf_verifier_log(log, "[%u] %s %s%s",
env->log_type_id,
- btf_kind_str[kind],
+ btf_type_str(t),
__btf_name_by_offset(btf, t->name_off),
log_details ? " " : "");
@@ -3128,7 +3128,7 @@ static int btf_struct_resolve(struct btf_verifier_env *env,
if (v->next_member) {
const struct btf_type *last_member_type;
const struct btf_member *last_member;
- u16 last_member_type_id;
+ u32 last_member_type_id;
last_member = btf_type_member(v->t) + v->next_member - 1;
last_member_type_id = last_member->type;
@@ -4854,7 +4854,6 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
u32 hdr_len, hdr_copy, btf_data_size;
const struct btf_header *hdr;
struct btf *btf;
- int err;
btf = env->btf;
btf_data_size = btf->data_size;
@@ -4911,11 +4910,7 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
return -EINVAL;
}
- err = btf_check_sec_info(env, btf_data_size);
- if (err)
- return err;
-
- return 0;
+ return btf_check_sec_info(env, btf_data_size);
}
static int btf_check_type_tags(struct btf_verifier_env *env,
@@ -5328,6 +5323,34 @@ static bool is_int_ptr(struct btf *btf, const struct btf_type *t)
return btf_type_is_int(t);
}
+static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto,
+ int off)
+{
+ const struct btf_param *args;
+ const struct btf_type *t;
+ u32 offset = 0, nr_args;
+ int i;
+
+ if (!func_proto)
+ return off / 8;
+
+ nr_args = btf_type_vlen(func_proto);
+ args = (const struct btf_param *)(func_proto + 1);
+ for (i = 0; i < nr_args; i++) {
+ t = btf_type_skip_modifiers(btf, args[i].type, NULL);
+ offset += btf_type_is_ptr(t) ? 8 : roundup(t->size, 8);
+ if (off < offset)
+ return i;
+ }
+
+ t = btf_type_skip_modifiers(btf, func_proto->type, NULL);
+ offset += btf_type_is_ptr(t) ? 8 : roundup(t->size, 8);
+ if (off < offset)
+ return nr_args;
+
+ return nr_args + 1;
+}
+
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
@@ -5347,7 +5370,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
tname, off);
return false;
}
- arg = off / 8;
+ arg = get_ctx_arg_idx(btf, t, off);
args = (const struct btf_param *)(t + 1);
/* if (t == NULL) Fall back to default BPF prog with
* MAX_BPF_FUNC_REG_ARGS u64 arguments.
@@ -5398,7 +5421,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (!btf_type_is_small_int(t)) {
bpf_log(log,
"ret type %s not allowed for fmod_ret\n",
- btf_kind_str[BTF_INFO_KIND(t->info)]);
+ btf_type_str(t));
return false;
}
break;
@@ -5417,7 +5440,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
/* skip modifiers */
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_small_int(t) || btf_is_any_enum(t))
+ if (btf_type_is_small_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
/* accessing a scalar */
return true;
if (!btf_type_is_ptr(t)) {
@@ -5425,7 +5448,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
"func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n",
tname, arg,
__btf_name_by_offset(btf, t->name_off),
- btf_kind_str[BTF_INFO_KIND(t->info)]);
+ btf_type_str(t));
return false;
}
@@ -5509,11 +5532,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (!btf_type_is_struct(t)) {
bpf_log(log,
"func '%s' arg%d type %s is not a struct\n",
- tname, arg, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ tname, arg, btf_type_str(t));
return false;
}
bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
- tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)],
+ tname, arg, info->btf_id, btf_type_str(t),
__btf_name_by_offset(btf, t->name_off));
return true;
}
@@ -5864,26 +5887,25 @@ again:
}
static int __get_type_size(struct btf *btf, u32 btf_id,
- const struct btf_type **bad_type)
+ const struct btf_type **ret_type)
{
const struct btf_type *t;
+ *ret_type = btf_type_by_id(btf, 0);
if (!btf_id)
/* void */
return 0;
t = btf_type_by_id(btf, btf_id);
while (t && btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (!t) {
- *bad_type = btf_type_by_id(btf, 0);
+ if (!t)
return -EINVAL;
- }
+ *ret_type = t;
if (btf_type_is_ptr(t))
/* kernel size of pointer. Not BPF's size of pointer*/
return sizeof(void *);
- if (btf_type_is_int(t) || btf_is_any_enum(t))
+ if (btf_type_is_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
return t->size;
- *bad_type = t;
return -EINVAL;
}
@@ -5902,8 +5924,10 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
/* BTF function prototype doesn't match the verifier types.
* Fall back to MAX_BPF_FUNC_REG_ARGS u64 args.
*/
- for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
+ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
m->arg_size[i] = 8;
+ m->arg_flags[i] = 0;
+ }
m->ret_size = 8;
m->nr_args = MAX_BPF_FUNC_REG_ARGS;
return 0;
@@ -5917,10 +5941,10 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
return -EINVAL;
}
ret = __get_type_size(btf, func->type, &t);
- if (ret < 0) {
+ if (ret < 0 || __btf_type_is_struct(t)) {
bpf_log(log,
"The function %s return type %s is unsupported.\n",
- tname, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ tname, btf_type_str(t));
return -EINVAL;
}
m->ret_size = ret;
@@ -5933,10 +5957,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
return -EINVAL;
}
ret = __get_type_size(btf, args[i].type, &t);
- if (ret < 0) {
+
+ /* No support of struct argument size greater than 16 bytes */
+ if (ret < 0 || ret > 16) {
bpf_log(log,
"The function %s arg%d type %s is unsupported.\n",
- tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ tname, i, btf_type_str(t));
return -EINVAL;
}
if (ret == 0) {
@@ -5946,6 +5972,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
return -EINVAL;
}
m->arg_size[i] = ret;
+ m->arg_flags[i] = __btf_type_is_struct(t) ? BTF_FMODEL_STRUCT_ARG : 0;
}
m->nr_args = nargs;
return 0;
@@ -6167,14 +6194,41 @@ static bool is_kfunc_arg_mem_size(const struct btf *btf,
return true;
}
+static bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
+ const struct btf_param *arg,
+ const struct bpf_reg_state *reg,
+ const char *name)
+{
+ int len, target_len = strlen(name);
+ const struct btf_type *t;
+ const char *param_name;
+
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+ return false;
+
+ param_name = btf_name_by_offset(btf, arg->name_off);
+ if (str_is_empty(param_name))
+ return false;
+ len = strlen(param_name);
+ if (len != target_len)
+ return false;
+ if (strcmp(param_name, name))
+ return false;
+
+ return true;
+}
+
static int btf_check_func_arg_match(struct bpf_verifier_env *env,
const struct btf *btf, u32 func_id,
struct bpf_reg_state *regs,
bool ptr_to_mem_ok,
- u32 kfunc_flags)
+ struct bpf_kfunc_arg_meta *kfunc_meta,
+ bool processing_call)
{
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
- bool rel = false, kptr_get = false, trusted_arg = false;
+ bool rel = false, kptr_get = false, trusted_args = false;
+ bool sleepable = false;
struct bpf_verifier_log *log = &env->log;
u32 i, nargs, ref_id, ref_obj_id = 0;
bool is_kfunc = btf_is_kernel(btf);
@@ -6207,11 +6261,12 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL;
}
- if (is_kfunc) {
+ if (is_kfunc && kfunc_meta) {
/* Only kfunc can be release func */
- rel = kfunc_flags & KF_RELEASE;
- kptr_get = kfunc_flags & KF_KPTR_GET;
- trusted_arg = kfunc_flags & KF_TRUSTED_ARGS;
+ rel = kfunc_meta->flags & KF_RELEASE;
+ kptr_get = kfunc_meta->flags & KF_KPTR_GET;
+ trusted_args = kfunc_meta->flags & KF_TRUSTED_ARGS;
+ sleepable = kfunc_meta->flags & KF_SLEEPABLE;
}
/* check that BTF function arguments match actual types that the
@@ -6221,9 +6276,42 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
enum bpf_arg_type arg_type = ARG_DONTCARE;
u32 regno = i + 1;
struct bpf_reg_state *reg = &regs[regno];
+ bool obj_ptr = false;
t = btf_type_skip_modifiers(btf, args[i].type, NULL);
if (btf_type_is_scalar(t)) {
+ if (is_kfunc && kfunc_meta) {
+ bool is_buf_size = false;
+
+ /* check for any const scalar parameter of name "rdonly_buf_size"
+ * or "rdwr_buf_size"
+ */
+ if (btf_is_kfunc_arg_mem_size(btf, &args[i], reg,
+ "rdonly_buf_size")) {
+ kfunc_meta->r0_rdonly = true;
+ is_buf_size = true;
+ } else if (btf_is_kfunc_arg_mem_size(btf, &args[i], reg,
+ "rdwr_buf_size"))
+ is_buf_size = true;
+
+ if (is_buf_size) {
+ if (kfunc_meta->r0_size) {
+ bpf_log(log, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
+ return -EINVAL;
+ }
+
+ if (!tnum_is_const(reg->var_off)) {
+ bpf_log(log, "R%d is not a const\n", regno);
+ return -EINVAL;
+ }
+
+ kfunc_meta->r0_size = reg->var_off.value;
+ ret = mark_chain_precision(env, regno);
+ if (ret)
+ return ret;
+ }
+ }
+
if (reg->type == SCALAR_VALUE)
continue;
bpf_log(log, "R%d is not a scalar\n", regno);
@@ -6236,10 +6324,17 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL;
}
+ /* These register types have special constraints wrt ref_obj_id
+ * and offset checks. The rest of trusted args don't.
+ */
+ obj_ptr = reg->type == PTR_TO_CTX || reg->type == PTR_TO_BTF_ID ||
+ reg2btf_ids[base_type(reg->type)];
+
/* Check if argument must be a referenced pointer, args + i has
* been verified to be a pointer (after skipping modifiers).
+ * PTR_TO_CTX is ok without having non-zero ref_obj_id.
*/
- if (is_kfunc && trusted_arg && !reg->ref_obj_id) {
+ if (is_kfunc && trusted_args && (obj_ptr && reg->type != PTR_TO_CTX) && !reg->ref_obj_id) {
bpf_log(log, "R%d must be referenced\n", regno);
return -EINVAL;
}
@@ -6248,12 +6343,23 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,