From 7711f4bb4b360d9c0ff84db1c0ec91e385625047 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 4 Dec 2025 12:20:35 +0100 Subject: netfilter: nft_set_pipapo: fix range overlap detection set->klen has to be used, not sizeof(). The latter only compares a single register but a full check of the entire key is needed. Example: table ip t { map s { typeof iifname . ip saddr : verdict flags interval } } nft add element t s '{ "lo" . 10.0.0.0/24 : drop }' # no error, expected nft add element t s '{ "lo" . 10.0.0.0/24 : drop }' # no error, expected nft add element t s '{ "lo" . 10.0.0.0/8 : drop }' # bug: no error The 3rd 'add element' should be rejected via -ENOTEMPTY, not -EEXIST, so userspace / nft can report an error to the user. The latter is only correct for the 2nd case (re-add of existing element). As-is, userspace is told that the command was successful, but no elements were added. After this patch, 3rd command gives: Error: Could not process rule: File exists add element t s { "lo" . 127.0.0.0/8 . "lo" : drop } ^^^^^^^^^^^^^^^^^^^^^^^^^ Fixes: 0eb4b5ee33f2 ("netfilter: nft_set_pipapo: Separate partial and complete overlap cases on insertion") Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 112fe46788b6..6d77a5f0088a 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1317,8 +1317,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, else dup_end = dup_key; - if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) && - !memcmp(end, dup_end->data, sizeof(*dup_end->data))) { + if (!memcmp(start, dup_key->data, set->klen) && + !memcmp(end, dup_end->data, set->klen)) { *elem_priv = &dup->priv; return -EEXIST; } -- cgit v1.2.3 From a675d1caa2041f05f6343fad67b04f8babf32217 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 4 Dec 2025 12:20:36 +0100 Subject: selftests: netfilter: nft_concat_range.sh: add check for overlap detection bug without 'netfilter: nft_set_pipapo: fix range overlap detection': reject overlapping range on add 0s [FAIL] Returned success for add { 1.2.3.4 . 1.2.4.1-1.2.4.2 } given set: table inet filter { [..] elements = { 1.2.3.4 . 1.2.4.1 counter packets 0 bytes 0, 1.2.3.0-1.2.3.4 . 1.2.4.2 counter packets 0 bytes 0 } } The element collides with existing ones and was not added, but kernel returned success to userspace. Signed-off-by: Florian Westphal --- .../selftests/net/netfilter/nft_concat_range.sh | 45 +++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index ad97c6227f35..394166f224a4 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -29,7 +29,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate insert_overlap" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -420,6 +420,18 @@ race_repeat 0 perf_duration 0 " +TYPE_insert_overlap=" +display reject overlapping range on add +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1954,6 +1966,37 @@ EOF return 0 } +add_fail() +{ + if nft add element inet filter test "$1" 2>/dev/null ; then + err "Returned success for add ${1} given set:" + err "$(nft -a list set inet filter test )" + return 1 + fi + + return 0 +} + +test_bug_insert_overlap() +{ + local elements="1.2.3.4 . 1.2.4.1" + + setup veth send_"${proto}" set || return ${ksft_skip} + + add "{ $elements }" || return 1 + + elements="1.2.3.0-1.2.3.4 . 1.2.4.1" + add_fail "{ $elements }" || return 1 + + elements="1.2.3.0-1.2.3.4 . 1.2.4.2" + add "{ $elements }" || return 1 + + elements="1.2.3.4 . 1.2.4.1-1.2.4.2" + add_fail "{ $elements }" || return 1 + + return 0 +} + test_reported_issues() { eval test_bug_"${subtest}" } -- cgit v1.2.3 From 36a3200575642846a96436d503d46544533bb943 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 17 Dec 2025 21:21:59 +0100 Subject: netfilter: nft_synproxy: avoid possible data-race on update operation During nft_synproxy eval we are reading nf_synproxy_info struct which can be modified on update operation concurrently. As nf_synproxy_info struct fits in 32 bits, use READ_ONCE/WRITE_ONCE annotations. Fixes: ee394f96ad75 ("netfilter: nft_synproxy: add synproxy stateful object support") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- net/netfilter/nft_synproxy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 5d3e51825985..4d3e5a31b412 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -48,7 +48,7 @@ static void nft_synproxy_eval_v4(const struct nft_synproxy *priv, struct tcphdr *_tcph, struct synproxy_options *opts) { - struct nf_synproxy_info info = priv->info; + struct nf_synproxy_info info = READ_ONCE(priv->info); struct net *net = nft_net(pkt); struct synproxy_net *snet = synproxy_pernet(net); struct sk_buff *skb = pkt->skb; @@ -79,7 +79,7 @@ static void nft_synproxy_eval_v6(const struct nft_synproxy *priv, struct tcphdr *_tcph, struct synproxy_options *opts) { - struct nf_synproxy_info info = priv->info; + struct nf_synproxy_info info = READ_ONCE(priv->info); struct net *net = nft_net(pkt); struct synproxy_net *snet = synproxy_pernet(net); struct sk_buff *skb = pkt->skb; @@ -340,7 +340,7 @@ static void nft_synproxy_obj_update(struct nft_object *obj, struct nft_synproxy *newpriv = nft_obj_data(newobj); struct nft_synproxy *priv = nft_obj_data(obj); - priv->info = newpriv->info; + WRITE_ONCE(priv->info, newpriv->info); } static struct nft_object_type nft_synproxy_obj_type; -- cgit v1.2.3 From 2bafeb8d2f380c3a81d98bd7b78b854b564f9cd4 Mon Sep 17 00:00:00 2001 From: Daniel Gomez Date: Fri, 19 Dec 2025 06:13:20 +0100 Subject: netfilter: replace -EEXIST with -EBUSY The -EEXIST error code is reserved by the module loading infrastructure to indicate that a module is already loaded. When a module's init function returns -EEXIST, userspace tools like kmod interpret this as "module already loaded" and treat the operation as successful, returning 0 to the user even though the module initialization actually failed. Replace -EEXIST with -EBUSY to ensure correct error reporting in the module initialization path. Affected modules: * ebtable_broute ebtable_filter ebtable_nat arptable_filter * ip6table_filter ip6table_mangle ip6table_nat ip6table_raw * ip6table_security iptable_filter iptable_mangle iptable_nat * iptable_raw iptable_security Signed-off-by: Daniel Gomez Signed-off-by: Florian Westphal --- net/bridge/netfilter/ebtables.c | 2 +- net/netfilter/nf_log.c | 4 ++-- net/netfilter/x_tables.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5697e3949a36..a04fc1757528 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1299,7 +1299,7 @@ int ebt_register_template(const struct ebt_table *t, int (*table_init)(struct ne list_for_each_entry(tmpl, &template_tables, list) { if (WARN_ON_ONCE(strcmp(t->name, tmpl->name) == 0)) { mutex_unlock(&ebt_mutex); - return -EEXIST; + return -EBUSY; } } diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 74cef8bf554c..62cf6a30875e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -89,7 +89,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) if (pf == NFPROTO_UNSPEC) { for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { if (rcu_access_pointer(loggers[i][logger->type])) { - ret = -EEXIST; + ret = -EBUSY; goto unlock; } } @@ -97,7 +97,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) rcu_assign_pointer(loggers[i][logger->type], logger); } else { if (rcu_access_pointer(loggers[pf][logger->type])) { - ret = -EEXIST; + ret = -EBUSY; goto unlock; } rcu_assign_pointer(loggers[pf][logger->type], logger); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 90b7630421c4..48105ea3df15 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1764,7 +1764,7 @@ EXPORT_SYMBOL_GPL(xt_hook_ops_alloc); int xt_register_template(const struct xt_table *table, int (*table_init)(struct net *net)) { - int ret = -EEXIST, af = table->af; + int ret = -EBUSY, af = table->af; struct xt_template *t; mutex_lock(&xt[af].mutex); -- cgit v1.2.3 From d077e8119ddbb4fca67540f1a52453631a47f221 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Wed, 24 Dec 2025 12:48:26 +0000 Subject: netfilter: nf_tables: fix memory leak in nf_tables_newrule() In nf_tables_newrule(), if nft_use_inc() fails, the function jumps to the err_release_rule label without freeing the allocated flow, leading to a memory leak. Fix this by adding a new label err_destroy_flow and jumping to it when nft_use_inc() fails. This ensures that the flow is properly released in this error case. Fixes: 1689f25924ada ("netfilter: nf_tables: report use refcount overflow") Signed-off-by: Zilin Guan Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 618af6e90773..729a92781a1a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4439,7 +4439,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, if (!nft_use_inc(&chain->use)) { err = -EMFILE; - goto err_release_rule; + goto err_destroy_flow; } if (info->nlh->nlmsg_flags & NLM_F_REPLACE) { @@ -4489,6 +4489,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, err_destroy_flow_rule: nft_use_dec_restore(&chain->use); +err_destroy_flow: if (flow) nft_flow_rule_destroy(flow); err_release_rule: -- cgit v1.2.3 From 7811ba452402d58628e68faedf38745b3d485e3c Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 17 Dec 2025 15:46:40 +0100 Subject: netfilter: nf_conncount: update last_gc only when GC has been performed Currently last_gc is being updated everytime a new connection is tracked, that means that it is updated even if a GC wasn't performed. With a sufficiently high packet rate, it is possible to always bypass the GC, causing the list to grow infinitely. Update the last_gc value only when a GC has been actually performed. Fixes: d265929930e2 ("netfilter: nf_conncount: reduce unnecessary GC") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- net/netfilter/nf_conncount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 3654f1e8976c..8487808c8761 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -229,6 +229,7 @@ static int __nf_conncount_add(struct net *net, nf_ct_put(found_ct); } + list->last_gc = (u32)jiffies; add_new_node: if (WARN_ON_ONCE(list->count > INT_MAX)) { @@ -248,7 +249,6 @@ add_new_node: conn->jiffies32 = (u32)jiffies; list_add_tail(&conn->node, &list->head); list->count++; - list->last_gc = (u32)jiffies; out_put: if (refcounted) -- cgit v1.2.3 From a428e0da1248c353557970848994f35fd3f005e2 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Mon, 29 Dec 2025 21:21:18 -0800 Subject: net: marvell: prestera: fix NULL dereference on devlink_alloc() failure devlink_alloc() may return NULL on allocation failure, but prestera_devlink_alloc() unconditionally calls devlink_priv() on the returned pointer. This leads to a NULL pointer dereference if devlink allocation fails. Add a check for a NULL devlink pointer and return NULL early to avoid the crash. Fixes: 34dd1710f5a3 ("net: marvell: prestera: Add basic devlink support") Signed-off-by: Alok Tiwari Acked-by: Elad Nachman Link: https://patch.msgid.link/20251230052124.897012-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/prestera/prestera_devlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c index 2a4c9df4eb79..e63d95c1842f 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c @@ -387,6 +387,8 @@ struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev) dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch), dev->dev); + if (!dl) + return NULL; return devlink_priv(dl); } -- cgit v1.2.3 From d7065436e8a04520f60c18240b775861be7999d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20Bl=C3=B6chl?= Date: Sun, 28 Dec 2025 16:52:59 +0100 Subject: net: bnge: add AUXILIARY_BUS to Kconfig dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The build can currently fail with ld: drivers/net/ethernet/broadcom/bnge/bnge_auxr.o: in function `bnge_rdma_aux_device_add': bnge_auxr.c:(.text+0x366): undefined reference to `__auxiliary_device_add' ld: drivers/net/ethernet/broadcom/bnge/bnge_auxr.o: in function `bnge_rdma_aux_device_init': bnge_auxr.c:(.text+0x43c): undefined reference to `auxiliary_device_init' if BNGE is enabled but no other driver pulls in AUXILIARY_BUS. Select AUXILIARY_BUS in BNGE like in all other drivers which create an auxiliary_device. Fixes: 8ac050ec3b1c ("bng_en: Add RoCE aux device support") Signed-off-by: Markus Blöchl Reviewed-by: Vikas Gupta Link: https://patch.msgid.link/20251228-bnge_aux_bus-v1-1-82e273ebfdac@blochl.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index ca565ace6e6a..cd7dddeb91dd 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -259,6 +259,7 @@ config BNGE depends on PCI select NET_DEVLINK select PAGE_POOL + select AUXILIARY_BUS help This driver supports Broadcom ThorUltra 50/100/200/400/800 gigabit Ethernet cards. The module will be called bng_en. To compile this -- cgit v1.2.3 From 3128df6be147768fe536986fbb85db1d37806a9f Mon Sep 17 00:00:00 2001 From: Alexandre Knecht Date: Sun, 28 Dec 2025 03:00:57 +0100 Subject: bridge: fix C-VLAN preservation in 802.1ad vlan_tunnel egress When using an 802.1ad bridge with vlan_tunnel, the C-VLAN tag is incorrectly stripped from frames during egress processing. br_handle_egress_vlan_tunnel() uses skb_vlan_pop() to remove the S-VLAN from hwaccel before VXLAN encapsulation. However, skb_vlan_pop() also moves any "next" VLAN from the payload into hwaccel: /* move next vlan tag to hw accel tag */ __skb_vlan_pop(skb, &vlan_tci); __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); For QinQ frames where the C-VLAN sits in the payload, this moves it to hwaccel where it gets lost during VXLAN encapsulation. Fix by calling __vlan_hwaccel_clear_tag() directly, which clears only the hwaccel S-VLAN and leaves the payload untouched. This path is only taken when vlan_tunnel is enabled and tunnel_info is configured, so 802.1Q bridges are unaffected. Tested with 802.1ad bridge + VXLAN vlan_tunnel, verified C-VLAN preserved in VXLAN payload via tcpdump. Fixes: 11538d039ac6 ("bridge: vlan dst_metadata hooks in ingress and egress paths") Signed-off-by: Alexandre Knecht Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20251228020057.2788865-1-knecht.alexandre@gmail.com Signed-off-by: Jakub Kicinski --- net/bridge/br_vlan_tunnel.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index a966a6ec8263..257cae9f1569 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -189,7 +189,6 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb, IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct metadata_dst *tunnel_dst; __be64 tunnel_id; - int err; if (!vlan) return 0; @@ -199,9 +198,13 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb, return 0; skb_dst_drop(skb); - err = skb_vlan_pop(skb); - if (err) - return err; + /* For 802.1ad (QinQ), skb_vlan_pop() incorrectly moves the C-VLAN + * from payload to hwaccel after clearing S-VLAN. We only need to + * clear the hwaccel S-VLAN; the C-VLAN must stay in payload for + * correct VXLAN encapsulation. This is also correct for 802.1Q + * where no C-VLAN exists in payload. + */ + __vlan_hwaccel_clear_tag(skb); if (BR_INPUT_SKB_CB(skb)->backup_nhid) { __set_bit(IP_TUNNEL_KEY_BIT, flags); -- cgit v1.2.3 From 34f3ff52cb9fa7dbf04f5c734fcc4cb6ed5d1a95 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Thu, 25 Dec 2025 20:36:17 +0000 Subject: net: mscc: ocelot: Fix crash when adding interface under a lag Commit 15faa1f67ab4 ("lan966x: Fix crash when adding interface under a lag") fixed a similar issue in the lan966x driver caused by a NULL pointer dereference. The ocelot_set_aggr_pgids() function in the ocelot driver has similar logic and is susceptible to the same crash. This issue specifically affects the ocelot_vsc7514.c frontend, which leaves unused ports as NULL pointers. The felix_vsc9959.c frontend is unaffected as it uses the DSA framework which registers all ports. Fix this by checking if the port pointer is valid before accessing it. Fixes: 528d3f190c98 ("net: mscc: ocelot: drop the use of the "lags" array") Signed-off-by: Jerry Wu Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/tencent_75EF812B305E26B0869C673DD1160866C90A@qq.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 08bee56aea35..c345d9b17c89 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2307,14 +2307,16 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot) /* Now, set PGIDs for each active LAG */ for (lag = 0; lag < ocelot->num_phys_ports; lag++) { - struct net_device *bond = ocelot->ports[lag]->bond; + struct ocelot_port *ocelot_port = ocelot->ports[lag]; int num_active_ports = 0; + struct net_device *bond; unsigned long bond_mask; u8 aggr_idx[16]; - if (!bond || (visited & BIT(lag))) + if (!ocelot_port || !ocelot_port->bond || (visited & BIT(lag))) continue; + bond = ocelot_port->bond; bond_mask = ocelot_get_bond_mask(ocelot, bond); for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) { -- cgit v1.2.3 From 4c0856c225b39b1def6c9a6bc56faca79550da13 Mon Sep 17 00:00:00 2001 From: "yuan.gao" Date: Wed, 24 Dec 2025 14:31:45 +0800 Subject: inet: ping: Fix icmp out counting When the ping program uses an IPPROTO_ICMP socket to send ICMP_ECHO messages, ICMP_MIB_OUTMSGS is counted twice. ping_v4_sendmsg ping_v4_push_pending_frames ip_push_pending_frames ip_finish_skb __ip_make_skb icmp_out_count(net, icmp_type); // first count icmp_out_count(sock_net(sk), user_icmph.type); // second count However, when the ping program uses an IPPROTO_RAW socket, ICMP_MIB_OUTMSGS is counted correctly only once. Therefore, the first count should be removed. Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Signed-off-by: yuan.gao Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20251224063145.3615282-1-yuan.gao@ucloud.cn Signed-off-by: Jakub Kicinski --- net/ipv4/ping.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index ad56588107cc..cfbd563498e8 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -828,10 +828,8 @@ out: out_free: if (free) kfree(ipc.opt); - if (!err) { - icmp_out_count(sock_net(sk), user_icmph.type); + if (!err) return len; - } return err; do_confirm: -- cgit v1.2.3 From 62f7edd59964eb588e96fce1ad35a2327ea54424 Mon Sep 17 00:00:00 2001 From: Stefano Radaelli Date: Tue, 23 Dec 2025 13:09:39 +0100 Subject: net: phy: mxl-86110: Add power management and soft reset support Implement soft_reset, suspend, and resume callbacks using genphy_soft_reset(), genphy_suspend(), and genphy_resume() to fix PHY initialization and power management issues. The soft_reset callback is needed to properly recover the PHY after an ifconfig down/up cycle. Without it, the PHY can remain in power-down state, causing MDIO register access failures during config_init(). The soft reset ensures the PHY is operational before configuration. The suspend/resume callbacks enable proper power management during system suspend/resume cycles. Fixes: b2908a989c59 ("net: phy: add driver for MaxLinear MxL86110 PHY") Signed-off-by: Stefano Radaelli Link: https://patch.msgid.link/20251223120940.407195-1-stefano.r@variscite.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mxl-86110.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/mxl-86110.c b/drivers/net/phy/mxl-86110.c index e5d137a37a1d..42a5fe3f115f 100644 --- a/drivers/net/phy/mxl-86110.c +++ b/drivers/net/phy/mxl-86110.c @@ -938,6 +938,9 @@ static struct phy_driver mxl_phy_drvs[] = { PHY_ID_MATCH_EXACT(PHY_ID_MXL86110), .name = "MXL86110 Gigabit Ethernet", .config_init = mxl86110_config_init, + .suspend = genphy_suspend, + .resume = genphy_resume, + .soft_reset = genphy_soft_reset, .get_wol = mxl86110_get_wol, .set_wol = mxl86110_set_wol, .led_brightness_set = mxl86110_led_brightness_set, -- cgit v1.2.3 From 2a71a1a8d0ed718b1c7a9ac61f07e5755c47ae20 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Wed, 24 Dec 2025 04:35:35 +0800 Subject: net: sock: fix hardened usercopy panic in sock_recv_errqueue skbuff_fclone_cache was created without defining a usercopy region, [1] unlike skbuff_head_cache which properly whitelists the cb[] field. [2] This causes a usercopy BUG() when CONFIG_HARDENED_USERCOPY is enabled and the kernel attempts to copy sk_buff.cb data to userspace via sock_recv_errqueue() -> put_cmsg(). The crash occurs when: 1. TCP allocates an skb using alloc_skb_fclone() (from skbuff_fclone_cache) [1] 2. The skb is cloned via skb_clone() using the pre-allocated fclone [3] 3. The cloned skb is queued to sk_error_queue for timestamp reporting 4. Userspace reads the error queue via recvmsg(MSG_ERRQUEUE) 5. sock_recv_errqueue() calls put_cmsg() to copy serr->ee from skb->cb [4] 6. __check_heap_object() fails because skbuff_fclone_cache has no usercopy whitelist [5] When cloned skbs allocated from skbuff_fclone_cache are used in the socket error queue, accessing the sock_exterr_skb structure in skb->cb via put_cmsg() triggers a usercopy hardening violation: [ 5.379589] usercopy: Kernel memory exposure attempt detected from SLUB object 'skbuff_fclone_cache' (offset 296, size 16)! [ 5.382796] kernel BUG at mm/usercopy.c:102! [ 5.383923] Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI [ 5.384903] CPU: 1 UID: 0 PID: 138 Comm: poc_put_cmsg Not tainted 6.12.57 #7 [ 5.384903] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [ 5.384903] RIP: 0010:usercopy_abort+0x6c/0x80 [ 5.384903] Code: 1a 86 51 48 c7 c2 40 15 1a 86 41 52 48 c7 c7 c0 15 1a 86 48 0f 45 d6 48 c7 c6 80 15 1a 86 48 89 c1 49 0f 45 f3 e8 84 27 88 ff <0f> 0b 490 [ 5.384903] RSP: 0018:ffffc900006f77a8 EFLAGS: 00010246 [ 5.384903] RAX: 000000000000006f RBX: ffff88800f0ad2a8 RCX: 1ffffffff0f72e74 [ 5.384903] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff87b973a0 [ 5.384903] RBP: 0000000000000010 R08: 0000000000000000 R09: fffffbfff0f72e74 [ 5.384903] R10: 0000000000000003 R11: 79706f6372657375 R12: 0000000000000001 [ 5.384903] R13: ffff88800f0ad2b8 R14: ffffea00003c2b40 R15: ffffea00003c2b00 [ 5.384903] FS: 0000000011bc4380(0000) GS:ffff8880bf100000(0000) knlGS:0000000000000000 [ 5.384903] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5.384903] CR2: 000056aa3b8e5fe4 CR3: 000000000ea26004 CR4: 0000000000770ef0 [ 5.384903] PKRU: 55555554 [ 5.384903] Call Trace: [ 5.384903] [ 5.384903] __check_heap_object+0x9a/0xd0 [ 5.384903] __check_object_size+0x46c/0x690 [ 5.384903] put_cmsg+0x129/0x5e0 [ 5.384903] sock_recv_errqueue+0x22f/0x380 [ 5.384903] tls_sw_recvmsg+0x7ed/0x1960 [ 5.384903] ? srso_alias_return_thunk+0x5/0xfbef5 [ 5.384903] ? schedule+0x6d/0x270 [ 5.384903] ? srso_alias_return_thunk+0x5/0xfbef5 [ 5.384903] ? mutex_unlock+0x81/0xd0 [ 5.384903] ? __pfx_mutex_unlock+0x10/0x10 [ 5.384903] ? __pfx_tls_sw_recvmsg+0x10/0x10 [ 5.384903] ? _raw_spin_lock_irqsave+0x8f/0xf0 [ 5.384903] ? _raw_read_unlock_irqrestore+0x20/0x40 [ 5.384903] ? srso_alias_return_thunk+0x5/0xfbef5 The crash offset 296 corresponds to skb2->cb within skbuff_fclones: - sizeof(struct sk_buff) = 232 - offsetof(struct sk_buff, cb) = 40 - offset of skb2.cb in fclones = 232 + 40 = 272 - crash offset 296 = 272 + 24 (inside sock_exterr_skb.ee) This patch uses a local stack variable as a bounce buffer to avoid the hardened usercopy check failure. [1] https://elixir.bootlin.com/linux/v6.12.62/source/net/ipv4/tcp.c#L885 [2] https://elixir.bootlin.com/linux/v6.12.62/source/net/core/skbuff.c#L5104 [3] https://elixir.bootlin.com/linux/v6.12.62/source/net/core/skbuff.c#L5566 [4] https://elixir.bootlin.com/linux/v6.12.62/source/net/core/skbuff.c#L5491 [5] https://elixir.bootlin.com/linux/v6.12.62/source/mm/slub.c#L5719 Fixes: 6d07d1cd300f ("usercopy: Restrict non-usercopy caches to size 0") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251223203534.1392218-2-bestswngs@gmail.com Signed-off-by: Jakub Kicinski --- net/core/sock.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 45c98bf524b2..a1c8b47b0d56 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3896,7 +3896,7 @@ void sock_enable_timestamp(struct sock *sk, enum sock_flags flag) int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type) { - struct sock_exterr_skb *serr; + struct sock_extended_err ee; struct sk_buff *skb; int copied, err; @@ -3916,8 +3916,9 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, sock_recv_timestamp(msg, sk, skb); - serr = SKB_EXT_ERR(skb); - put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee); + /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */ + ee = SKB_EXT_ERR(skb)->ee; + put_cmsg(msg, level, type, sizeof(ee), &ee); msg->msg_flags |= MSG_ERRQUEUE; err = copied; -- cgit v1.2.3 From 02d1e1a3f9239cdb3ecf2c6d365fb959d1bf39df Mon Sep 17 00:00:00 2001 From: Di Zhu Date: Wed, 24 Dec 2025 09:22:24 +0800 Subject: netdev: preserve NETIF_F_ALL_FOR_ALL across TSO updates Directly increment the TSO features incurs a side effect: it will also directly clear the flags in NETIF_F_ALL_FOR_ALL on the master device, which can cause issues such as the inability to enable the nocache copy feature on the bonding driver. The fix is to include NETIF_F_ALL_FOR_ALL in the update mask, thereby preventing it from being cleared. Fixes: b0ce3508b25e ("bonding: allow TSO being set on bonding master") Signed-off-by: Di Zhu Link: https://patch.msgid.link/20251224012224.56185-1-zhud@hygon.cn Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5870a9e514a5..d99b0fbc1942 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5323,7 +5323,8 @@ netdev_features_t netdev_increment_features(netdev_features_t all, static inline netdev_features_t netdev_add_tso_features(netdev_features_t features, netdev_features_t mask) { - return netdev_increment_features(features, NETIF_F_ALL_TSO, mask); + return netdev_increment_features(features, NETIF_F_ALL_TSO | + NETIF_F_ALL_FOR_ALL, mask); } int __netdev_update_features(struct net_device *dev); -- cgit v1.2.3 From 31057979cdadfee9f934746fd84046b43506ba61 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 25 Dec 2025 15:27:13 +0200 Subject: net/mlx5: Lag, multipath, give priority for routes with smaller network prefix Today multipath offload is controlled by a single route and the route controlling is selected if it meets one of the following criteria: 1. No controlling route is set. 2. New route destination is the same as old one. 3. New route metric is lower than old route metric. This can cause unwanted behaviour in case a new route is added with a smaller network prefix which should get the priority. Fix this by adding a new criteria to give priority to new route with a smaller network prefix. Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry") Signed-off-by: Patrisious Haddad Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20251225132717.358820-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index aee17fcf3b36..cdc99fe5c956 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -173,10 +173,15 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, } /* Handle multipath entry with lower priority value */ - if (mp->fib.mfi && mp->fib.mfi != fi && + if (mp->fib.mfi && (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) && - fi->fib_priority >= mp->fib.priority) + mp->fib.dst_len <= fen_info->dst_len && + !(mp->fib.dst_len == fen_info->dst_len && + fi->fib_priority < mp->fib.priority)) { + mlx5_core_dbg(ldev->pf[idx].dev, + "Multipath entry with lower priority was rejected\n"); return; + } nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL); nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0); -- cgit v1.2.3 From 6c75dc9de40ff91ec2b621b78f6cd9031762067c Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Thu, 25 Dec 2025 15:27:14 +0200 Subject: net/mlx5e: Don't gate FEC histograms on ppcnt_statistical_group Currently, the ppcnt_statistical_group capability check incorrectly gates access to FEC histogram statistics. This capability applies only to statistical and physical counter groups, not for histogram data. Restrict the ppcnt_statistical_group check to the Physical_Layer_Counters and Physical_Layer_Statistical_Counters groups. Histogram statistics access remains gated by the pphcr capability. The issue is harmless as of today, as it happens that ppcnt_statistical_group is set on all existing devices that have pphcr set. Fixes: 6b81b8a0b197 ("net/mlx5e: Don't query FEC statistics when FEC is disabled") Signed-off-by: Alexei Lazar Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20251225132717.358820-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index a2802cfc9b98..a8af84fc9763 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -1608,12 +1608,13 @@ void mlx5e_stats_fec_get(struct mlx5e_priv *priv, { int mode = fec_active_mode(priv->mdev); - if (mode == MLX5E_FEC_NOFEC || - !MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group)) + if (mode == MLX5E_FEC_NOFEC) return; - fec_set_corrected_bits_total(priv, fec_stats); - fec_set_block_stats(priv, mode, fec_stats); + if (MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group)) { + fec_set_corrected_bits_total(priv, fec_stats); + fec_set_block_stats(priv, mode, fec_stats); + } if (MLX5_CAP_PCAM_REG(priv->mdev, pphcr)) fec_set_histograms_stats(priv, mode, hist); -- cgit v1.2.3 From 7d36a4a8bf62dc508bc6bb4b59727aec25064ca5 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 25 Dec 2025 15:27:15 +0200 Subject: net/mlx5e: Fix NULL pointer dereference in ioctl module EEPROM query The mlx5_query_mcia() function unconditionally dereferences the status pointer to store the MCIA register status value. However, mlx5e_get_module_id() passes NULL since it doesn't need the status value. Add a NULL check before dereferencing the status pointer to prevent a NULL pointer dereference. Fixes: 2e4c44b12f4d ("net/mlx5: Refactor EEPROM query error handling to return status separately") Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Reviewed-by: Dragos Tatulea Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20251225132717.358820-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 85a9e534f442..8f36454dd196 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -393,9 +393,11 @@ static int mlx5_query_mcia(struct mlx5_core_dev *dev, if (err) return err; - *status = MLX5_GET(mcia_reg, out, status); - if (*status) + if (MLX5_GET(mcia_reg, out, status)) { + if (status) + *status = MLX5_GET(mcia_reg, out, status); return -EIO; + } ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); memcpy(data, ptr, size); -- cgit v1.2.3 From 144297e2a24e3e54aee1180ec21120ea38822b97 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 25 Dec 2025 15:27:16 +0200 Subject: net/mlx5e: Don't print error message due to invalid module Dumping module EEPROM on newer modules is supported through the netlink interface only. Querying with old userspace ethtool (or other tools, such as 'lshw') which still uses the ioctl interface results in an error message that could flood dmesg (in addition to the expected error return value). The original message was added under the assumption that the driver should be able to handle all module types, but now that such flows are easily triggered from userspace, it doesn't serve its purpose. Change the log level of the print in mlx5_query_module_eeprom() to debug. Fixes: bb64143eee8c ("net/mlx5e: Add ethtool support for dump module EEPROM") Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20251225132717.358820-5-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 8f36454dd196..7f8bed353e67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -431,7 +431,8 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset); break; default: - mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); + mlx5_core_dbg(dev, "Module ID not recognized: 0x%x\n", + module_id); return -EINVAL; } -- cgit v1.2.3 From 0462a15d2d1fafd3d48cf3c7c67393e42d03908c Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Thu, 25 Dec 2025 15:27:17 +0200 Subject: net/mlx5e: Dealloc forgotten PSP RX modify header The commit which added RX steering rules for PSP forgot to free a modify header HW object on the cleanup path, which lead to health errors when reloading the driver and uninitializing the device: mlx5_core 0000:08:00.0: poll_health:803:(pid 3021): Fatal error 3 detected Fix that by saving the modify header pointer in the PSP steering struct and deallocating it after freeing the rule which references it. Fixes: 9536fbe10c9d ("net/mlx5e: Add PSP steering in local NIC RX") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20251225132717.358820-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c index 38e7c77cc851..9a74438ce10a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c @@ -44,6 +44,7 @@ struct mlx5e_accel_fs_psp_prot { struct mlx5_flow_table *ft; struct mlx5_flow_group *miss_group; struct mlx5_flow_handle *miss_rule; + struct mlx5_modify_hdr *rx_modify_hdr; struct mlx5_flow_destination default_dest; struct mlx5e_psp_rx_err rx_err; u32 refcnt; @@ -286,13 +287,19 @@ out_err: return err; } -static void accel_psp_fs_rx_fs_destroy(struct mlx5e_accel_fs_psp_prot *fs_prot) +static void accel_psp_fs_rx_fs_destroy(struct mlx5e_psp_fs *fs, + struct mlx5e_accel_fs_psp_prot *fs_prot) { if (fs_prot->def_rule) { mlx5_del_flow_rules(fs_prot->def_rule); fs_prot->def_rule = NULL; } + if (fs_prot->rx_modify_hdr) { + mlx5_modify_header_dealloc(fs->mdev, fs_prot->rx_modify_hdr); + fs_prot->rx_modify_hdr = NULL; + } + if (fs_prot->miss_rule) { mlx5_del_flow_rules(fs_prot->miss_rule); fs_prot->miss_rule = NULL; @@ -396,6 +403,7 @@ static int accel_psp_fs_rx_create_ft(struct mlx5e_psp_fs *fs, modify_hdr = NULL; goto out_err; } + fs_prot->rx_modify_hdr = modify_hdr; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | @@ -416,7 +424,7 @@ static int accel_psp_fs_rx_create_ft(struct mlx5e_psp_fs *fs, goto out; out_err: - accel_psp_fs_rx_fs_destroy(fs_prot); + accel_psp_fs_rx_fs_destroy(fs, fs_prot); out: kvfree(flow_group_in); kvfree(spec); @@ -433,7 +441,7 @@ static int accel_psp_fs_rx_destroy(struct mlx5e_psp_fs *fs, enum accel_fs_psp_ty /* The netdev unreg already happened, so all offloaded rule are already removed */ fs_prot = &accel_psp->fs_prot[type]; - accel_psp_fs_rx_fs_destroy(fs_prot); + accel_psp_fs_rx_fs_destroy(fs, fs_prot); accel_psp_fs_rx_err_destroy_ft(fs, &fs_prot->rx_err); -- cgit v1.2.3 From 8da901ffe497a53fa4ecc3ceed0e6d771586f88e Mon Sep 17 00:00:00 2001 From: Frank Liang Date: Wed, 31 Dec 2025 22:58:08 +0800 Subject: net/ena: fix missing lock when update devlink params Fix assert lock warning while calling devl_param_driverinit_value_set() in ena. WARNING: net/devlink/core.c:261 at devl_assert_locked+0x62/0x90, CPU#0: kworker/0:0/9 CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 6.19.0-rc2+ #1 PREEMPT(lazy) Hardware name: Amazon EC2 m8i-flex.4xlarge/, BIOS 1.0 10/16/2017 Workqueue: events work_for_cpu_fn RIP: 0010:devl_assert_locked+0x62/0x90 Call Trace: devl_param_driverinit_value_set+0x15/0x1c0 ena_devlink_alloc+0x18c/0x220 [ena] ? __pfx_ena_devlink_alloc+0x10/0x10 [ena] ? trace_hardirqs_on+0x18/0x140 ? lockdep_hardirqs_on+0x8c/0x130 ? __raw_spin_unlock_irqrestore+0x5d/0x80 ? __raw_spin_unlock_irqrestore+0x46/0x80 ? devm_ioremap_wc+0x9a/0xd0 ena_probe+0x4d2/0x1b20 [ena] ? __lock_acquire+0x56a/0xbd0 ? __pfx_ena_probe+0x10/0x10 [ena] ? local_clock+0x15/0x30 ? __lock_release.isra.0+0x1c9/0x340 ? mark_held_locks+0x40/0x70 ? lockdep_hardirqs_on_prepare.part.0+0x92/0x170 ? trace_hardirqs_on+0x18/0x140 ? lockdep_hardirqs_on+0x8c/0x130 ? __raw_spin_unlock_irqrestore+0x5d/0x80 ? __raw_spin_unlock_irqrestore+0x46/0x80 ? __pfx_ena_probe+0x10/0x10 [ena] ...... Fixes: 816b52624cf6 ("net: ena: Control PHC enable through devlink") Signed-off-by: Frank Liang Reviewed-by: David Arinzon Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20251231145808.6103-1-xiliang@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_devlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_devlink.c b/drivers/net/ethernet/amazon/ena/ena_devlink.c index ac81c24016dd..4772185e669d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_devlink.c +++ b/drivers/net/ethernet/amazon/ena/ena_devlink.c @@ -53,10 +53,12 @@ void ena_devlink_disable_phc_param(struct devlink *devlink) { union devlink_param_value value; + devl_lock(devlink); value.vbool = false; devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC, value); + devl_unlock(devlink); } static void ena_devlink_port_register(struct devlink *devlink) @@ -145,10 +147,12 @@ static int ena_devlink_configure_params(struct devlink *devlink) return rc; } + devl_lock(devlink); value.vbool = ena_phc_is_enabled(adapter); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC, value); + devl_unlock(devlink); return 0; } -- cgit v1.2.3 From 92e6e0a87f6860a4710f9494f8c704d498ae60f8 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Tue, 30 Dec 2025 07:18:53 +0000 Subject: net: wwan: iosm: Fix memory leak in ipc_mux_deinit() Commit 1f52d7b62285 ("net: wwan: iosm: Enable M.2 7360 WWAN card support") allocated memory for pp_qlt in ipc_mux_init() but did not free it in ipc_mux_deinit(). This results in a memory leak when the driver is unloaded. Free the allocated memory in ipc_mux_deinit() to fix the leak. Fixes: 1f52d7b62285 ("net: wwan: iosm: Enable M.2 7360 WWAN card support") Co-developed-by: Jianhao Xu Signed-off-by: Jianhao Xu Signed-off-by: Zilin Guan Reviewed-by: Loic Poulain Link: https://patch.msgid.link/20251230071853.1062223-1-zilin@seu.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/wwan/iosm/iosm_ipc_mux.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux.c b/drivers/net/wwan/iosm/iosm_ipc_mux.c index fc928b298a98..b846889fcb09 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux.c @@ -456,6 +456,7 @@ void ipc_mux_deinit(struct iosm_mux *ipc_mux) struct sk_buff_head *free_list; union mux_msg mux_msg; struct sk_buff *skb; + int i; if (!ipc_mux->initialized) return; @@ -479,5 +480,10 @@ void ipc_mux_deinit(struct iosm_mux *ipc_mux) ipc_mux->channel->dl_pipe.is_open = false; } + if (ipc_mux->protocol != MUX_LITE) { + for (i = 0; i < IPC_MEM_MUX_IP_SESSION_ENTRIES; i++) + kfree(ipc_mux->ul_adb.pp_qlt[i]); + } + kfree(ipc_mux); } -- cgit v1.2.3 From ffeafa65b2b26df2f5b5a6118d3174f17bd12ec5 Mon Sep 17 00:00:00 2001 From: Srijit Bose Date: Wed, 31 Dec 2025 00:36:25 -0800 Subject: bnxt_en: Fix potential data corruption with HW GRO/LRO Fix the max number of bits passed to find_first_zero_bit() in bnxt_alloc_agg_idx(). We were incorrectly passing the number of long words. find_first_zero_bit() may fail to find a zero bit and cause a wrong ID to be used. If the wrong ID is already in use, this can cause data corruption. Sometimes an error like this can also be seen: bnxt_en 0000:83:00.0 enp131s0np0: TPA end agg_buf 2 != expected agg_bufs 1 Fix it by passing the correct number of bits MAX_TPA_P5. Use DECLARE_BITMAP() to more cleanly define the bitmap. Add a sanity check to warn if a bit cannot be found and reset the ring [MChan]. Fixes: ec4d8e7cf024 ("bnxt_en: Add TPA ID mapping logic for 57500 chips.") Reviewed-by: Ray Jui Signed-off-by: Srijit Bose Signed-off-by: Michael Chan Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251231083625.3911652-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 15 ++++++++++++--- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 4 +--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d17d0ea89c36..d160e54ac121 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1482,9 +1482,11 @@ static u16 bnxt_alloc_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id) struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map; u16 idx = agg_id & MAX_TPA_P5_MASK; - if (test_bit(idx, map->agg_idx_bmap)) - idx = find_first_zero_bit(map->agg_idx_bmap, - BNXT_AGG_IDX_BMAP_SIZE); + if (test_bit(idx, map->agg_idx_bmap)) { + idx = find_first_zero_bit(map->agg_idx_bmap, MAX_TPA_P5); + if (idx >= MAX_TPA_P5) + return INVALID_HW_RING_ID; + } __set_bit(idx, map->agg_idx_bmap); map->agg_id_tbl[agg_id] = idx; return idx; @@ -1548,6 +1550,13 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { agg_id = TPA_START_AGG_ID_P5(tpa_start); agg_id = bnxt_alloc_agg_idx(rxr, agg_id); + if (unlikely(agg_id == INVALID_HW_RING_ID)) { + netdev_warn(bp->dev, "Unable to allocate agg ID for ring %d, agg 0x%x\n", + rxr->bnapi->index, + TPA_START_AGG_ID_P5(tpa_start)); + bnxt_sched_reset_rxr(bp, rxr); + return; + } } else { agg_id = TPA_START_AGG_ID(tpa_start); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index f5f07a7e6b29..f88e7769a838 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1080,11 +1080,9 @@ struct bnxt_tpa_info { struct rx_agg_cmp *agg_arr; }; -#define BNXT_AGG_IDX_BMAP_SIZE (MAX_TPA_P5 / BITS_PER_LONG) - struct bnxt_tpa_idx_map { u16 agg_id_tbl[1024]; - unsigned long agg_idx_bmap[BNXT_AGG_IDX_BMAP_SIZE]; + DECLARE_BITMAP(agg_idx_bmap, MAX_TPA_P5); }; struct bnxt_rx_ring_info { -- cgit v1.2.3 From acb4bc6e1ba34ae1a34a9334a1ce8474c909466e Mon Sep 17 00:00:00 2001 From: Kommula Shiva Shankar Date: Fri, 2 Jan 2026 15:49:00 +0530 Subject: virtio_net: fix device mismatch in devm_kzalloc/devm_kfree Initial rss_hdr allocation uses virtio_device->device, but virtnet_set_queues() frees using net_device->device. This device mismatch causing below devres warning [ 3788.514041] ------------[ cut here ]------------ [ 3788.514044] WARNING: drivers/base/devres.c:1095 at devm_kfree+0x84/0x98, CPU#16: vdpa/1463 [ 3788.514054] Modules linked in: octep_vdpa virtio_net virtio_vdpa [last unloaded: virtio_vdpa] [ 3788.514064] CPU: 16 UID: 0 PID: 1463 Comm: vdpa Tainted: G W 6.18.0 #10 PREEMPT [ 3788.514067] Tainted: [W]=WARN [ 3788.514069] Hardware name: Marvell CN106XX board (DT) [ 3788.514071] pstate: 63400009 (nZCv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) [ 3788.514074] pc : devm_kfree+0x84/0x98 [ 3788.514076] lr : devm_kfree+0x54/0x98 [ 3788.514079] sp : ffff800084e2f220 [ 3788.514080] x29: ffff800084e2f220 x28: ffff0003b2366000 x27: 000000000000003f [ 3788.514085] x26: 000000000000003f x25: ffff000106f17c10 x24: 0000000000000080 [ 3788.514089] x23: ffff00045bb8ab08 x22: ffff00045bb8a000 x21: 0000000000000018 [ 3788.514093] x20: ffff0004355c3080 x19: ffff00045bb8aa00 x18: 0000000000080000 [ 3788.514098] x17: 0000000000000040 x16: 000000000000001f x15: 000000000007ffff [ 3788.514102] x14: 0000000000000488 x13: 0000000000000005 x12: 00000000000fffff [ 3788.514106] x11: ffffffffffffffff x10: 0000000000000005 x9 : ffff800080c8c05c [ 3788.514110] x8 : ffff800084e2eeb8 x7 : 0000000000000000 x6 : 000000000000003f [ 3788.514115] x5 : ffff8000831bafe0 x4 : ffff800080c8b010 x3 : ffff0004355c3080 [ 3788.514119] x2 : ffff0004355c3080 x1 : 0000000000000000 x0 : 0000000000000000 [ 3788.514123] Call trace: [ 3788.514125] devm_kfree+0x84/0x98 (P) [ 3788.514129] virtnet_set_queues+0x134/0x2e8 [virtio_net] [ 3788.514135] virtnet_probe+0x9c0/0xe00 [virtio_net] [ 3788.514139] virtio_dev_probe+0x1e0/0x338 [ 3788.514144] really_probe+0xc8/0x3a0 [ 3788.514149] __driver_probe_device+0x84/0x170 [ 3788.514152] driver_probe_device+0x44/0x120 [ 3788.514155] __device_attach_driver+0xc4/0x168 [ 3788.514158] bus_for_each_drv+0x8c/0xf0 [ 3788.514161] __device_attach+0xa4/0x1c0 [ 3788.514164] device_initial_probe+0x1c/0x30 [ 3788.514168] bus_probe_device+0xb4/0xc0 [ 3788.514170] device_add+0x614/0x828 [ 3788.514173] register_virtio_device+0x214/0x258 [ 3788.514175] virtio_vdpa_probe+0xa0/0x110 [virtio_vdpa] [ 3788.514179] vdpa_dev_probe+0xa8/0xd8 [ 3788.514183] really_probe+0xc8/0x3a0 [ 3788.514186] __driver_probe_device+0x84/0x170 [ 3788.514189] driver_probe_device+0x44/0x120 [ 3788.514192] __device_attach_driver+0xc4/0x168 [ 3788.514195] bus_for_each_drv+0x8c/0xf0 [ 3788.514197] __device_attach+0xa4/0x1c0 [ 3788.514200] device_initial_probe+0x1c/0x30 [ 3788.514203] bus_probe_device+0xb4/0xc0 [ 3788.514206] device_add+0x614/0x828 [ 3788.514209] _vdpa_register_device+0x58/0x88 [ 3788.514211] octep_vdpa_dev_add+0x104/0x228 [octep_vdpa] [ 3788.514215] vdpa_nl_cmd_dev_add_set_doit+0x2d0/0x3c0 [ 3788.514218] genl_family_rcv_msg_doit+0xe4/0x158 [ 3788.514222] genl_rcv_msg+0x218/0x298 [ 3788.514225] netlink_rcv_skb+0x64/0x138 [ 3788.514229] genl_rcv+0x40/0x60 [ 3788.514233] netlink_unicast+0x32c/0x3b0 [ 3788.514237] netlink_sendmsg+0x170/0x3b8 [ 3788.514241] __sys_sendto+0x12c/0x1c0 [ 3788.514246] __arm64_sys_sendto+0x30/0x48 [ 3788.514249] invoke_syscall.constprop.0+0x58/0xf8 [ 3788.514255] do_el0_svc+0x48/0xd0 [ 3788.514259] el0_svc+0x48/0x210 [ 3788.514264] el0t_64_sync_handler+0xa0/0xe8 [ 3788.514268] el0t_64_sync+0x198/0x1a0 [ 3788.514271] ---[ end trace 0000000000000000 ]--- Fix by using virtio_device->device consistently for allocation and deallocation Fixes: 4944be2f5ad8c ("virtio_net: Allocate rss_hdr with devres") Signed-off-by: Kommula Shiva Shankar Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20260102101900.692770-1-kshankar@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1bb3aeca66c6..22d894101c01 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3791,7 +3791,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) if (vi->has_rss && !netif_is_rxfh_configured(dev)) { old_rss_hdr = vi->rss_hdr; old_rss_trailer = vi->rss_trailer; - vi->rss_hdr = devm_kzalloc(&dev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); + vi->rss_hdr = devm_kzalloc(&vi->vdev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); if (!vi->rss_hdr) { vi->rss_hdr = old_rss_hdr; return -ENOMEM; @@ -3802,7 +3802,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) if (!virtnet_commit_rss_command(vi)) { /* restore ctrl_rss if commit_rss_command failed */ - devm_kfree(&dev->dev, vi->rss_hdr); + devm_kfree(&vi->vdev->dev, vi->rss_hdr); vi->rss_hdr = old_rss_hdr; vi->rss_trailer = old_rss_trailer; @@ -3810,7 +3810,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) queue_pairs); return -EINVAL; } - devm_kfree(&dev->dev, old_rss_hdr); + devm_kfree(&vi->vdev->dev, old_rss_hdr); goto succ; } -- cgit v1.2.3 From 2ef02ac38d3c17f34a00c4b267d961a8d4b45d1a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 2 Jan 2026 15:00:07 +0100 Subject: inet: frags: drop fraglist conntrack references Jakub added a warning in nf_conntrack_cleanup_net_list() to make debugging leaked skbs/conntrack references more obvious. syzbot reports this as triggering, and I can also reproduce this via ip_defrag.sh selftest: conntrack cleanup blocked for 60s WARNING: net/netfilter/nf_conntrack_core.c:2512 [..] conntrack clenups gets stuck because there are skbs with still hold nf_conn references via their frag_list. net.core.skb_defer_max=0 makes the hang disappear. Eric Dumazet points out that skb_release_head_state() doesn't follow the fraglist. ip_defrag.sh can only reproduce this problem since commit 6471658dc66c ("udp: use skb_attempt_defer_free()"), but AFAICS this problem could happen with TCP as well if pmtu discovery is off. The relevant problem path for udp is: 1. netns emits fragmented packets 2. nf_defrag_v6_hook reassembles them (in output hook) 3. reassembled skb is tracked (skb owns nf_conn reference) 4. ip6_output refragments 5. refragmented packets also own nf_conn reference (ip6_fragment calls ip6_copy_metadata()) 6. on input path, nf_defrag_v6_hook skips defragmentation: the fragments already have skb->nf_conn attached 7. skbs are reassembled via ipv6_frag_rcv() 8. skb_consume_udp -> skb_attempt_defer_free() -> skb ends up in pcpu freelist, but still has nf_conn reference. Possible solutions: 1 let defrag engine drop nf_conn entry, OR 2 export kick_defer_list_purge() and call it from the conntrack netns exit callback, OR 3 add skb_has_frag_list() check to skb_attempt_defer_free() 2 & 3 also solve ip_defrag.sh hang but share same drawback: Such reassembled skbs, queued to socket, can prevent conntrack module removal until userspace has consumed the packet. While both tcp and udp stack do call nf_reset_ct() before placing skb on socket queue, that function doesn't iterate frag_list skbs. Therefore drop nf_conn entries when they are placed in defrag queue. Keep the nf_conn entry of the first (offset 0) skb so that reassembled skb retains nf_conn entry for sake of TX path. Note that fixes tag is incorrect; it points to the commit introducing the 'ip_defrag.sh reproducible problem': no need to backport this patch to every stable kernel. Reported-by: syzbot+4393c47753b7808dac7d@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/693b0fa7.050a0220.4004e.040d.GAE@google.com/ Fixes: 6471658dc66c ("udp: use skb_attempt_defer_free()") Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260102140030.32367-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/ipv4/inet_fragment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 001ee5c4d962..4e6d7467ed44 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -488,6 +488,8 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, } FRAG_CB(skb)->ip_defrag_offset = offset; + if (offset) + nf_reset_ct(skb); return IPFRAG_OK; } -- cgit v1.2.3 From 1806d210e5a8f431ad4711766ae4a333d407d972 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Sat, 3 Jan 2026 17:53:31 +0100 Subject: MAINTAINERS: Update email address for Justin Iurman Due to a change of employer, I'll be using a permanent and personal email address. Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20260103165331.20120-1-justin.iurman@gmail.com Signed-off-by: Jakub Kicinski --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 7a6110d0e46d..7354436a19c0 100644 --- a/.mailmap +++ b/.mailmap @@ -416,6 +416,7 @@ Juha Yrjola Juha Yrjola Juha Yrjola Julien Thierry +Justin Iurman Iskren Chernev Kalle Valo Kalle Valo diff --git a/MAINTAINERS b/MAINTAINERS index 765ad2daa218..410fd1f199f2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18283,7 +18283,7 @@ X: net/wireless/ X: tools/testing/selftests/net/can/ NETWORKING [IOAM] -M: Justin Iurman +M: Justin Iurman S: Maintained F: Documentation/networking/ioam6* F: include/linux/ioam6* -- cgit v1.2.3 From ce5e612dd411de096aa041b9e9325ba1bec5f9f4 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 29 Dec 2025 20:43:10 +0100 Subject: vsock: Make accept()ed sockets use custom setsockopt() SO_ZEROCOPY handling in vsock_connectible_setsockopt() does not get called on accept()ed sockets due to a missing flag. Flip it. Fixes: e0718bd82e27 ("vsock: enable setting SO_ZEROCOPY") Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20251229-vsock-child-sock-custom-sockopt-v2-1-64778d6c4f88@rbox.co Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index adcba1b7bf74..a3505a4dcee0 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1787,6 +1787,10 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, } else { newsock->state = SS_CONNECTED; sock_graft(connected, newsock); + + set_bit(SOCK_CUSTOM_SOCKOPT, + &connected->sk_socket->flags); + if (vsock_msgzerocopy_allow(vconnected->transport)) set_bit(SOCK_SUPPORT_ZC, &connected->sk_socket->flags); -- cgit v1.2.3 From caa20e9e155b9d2afcf658e2909659c0be45ec12 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 29 Dec 2025 20:43:11 +0100 Subject: vsock/test: Test setting SO_ZEROCOPY on accept()ed socket Make sure setsockopt(SOL_SOCKET, SO_ZEROCOPY) on an accept()ed socket is handled by vsock's implementation. Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20251229-vsock-child-sock-custom-sockopt-v2-2-64778d6c4f88@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 9e1250790f33..bbe3723babdc 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -2192,6 +2192,33 @@ static void test_stream_nolinger_server(const struct test_opts *opts) close(fd); } +static void test_stream_accepted_setsockopt_client(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_accepted_setsockopt_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -2371,6 +2398,11 @@ static struct test_case test_cases[] = { .run_client = test_seqpacket_unread_bytes_client, .run_server = test_seqpacket_unread_bytes_server, }, + { + .name = "SOCK_STREAM accept()ed socket custom setsockopt()", + .run_client = test_stream_accepted_setsockopt_client, + .run_server = test_stream_accepted_setsockopt_server, + }, {}, }; -- cgit v1.2.3 From 9892353726ad222219aa18c329e3a3636134dd56 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Thu, 1 Jan 2026 08:56:07 -0500 Subject: net/sched: act_mirred: Fix leak when redirecting to self on egress Whenever a mirred redirect to self on egress happens, mirred allocates a new skb (skb_to_send). The loop to self check was done after that allocation, but was not freeing the newly allocated skb, causing a leak. Fix this by moving the if-statement to before the allocation of the new skb. The issue was found by running the accompanying tdc test in 2/2 with config kmemleak enabled. After a few minutes the kmemleak thread ran and reported the leak coming from mirred. Fixes: 1d856251a009 ("net/sched: act_mirred: fix loop detection") Signed-off-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260101135608.253079-2-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/act_mirred.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 91c96cc625bd..05e0b14b5773 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -266,11 +266,22 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, goto err_cant_do; } + want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + + at_ingress = skb_at_tc_ingress(skb); + if (dev == skb->dev && want_ingress == at_ingress) { + pr_notice_once("tc mirred: Loop (%s:%s --> %s:%s)\n", + netdev_name(skb->dev), + at_ingress ? "ingress" : "egress", + netdev_name(dev), + want_ingress ? "ingress" : "egress"); + goto err_cant_do; + } + /* we could easily avoid the clone only if called by ingress and clsact; * since we can't easily detect the clsact caller, skip clone only for * ingress - that covers the TC S/W datapath. */ - at_ingress = skb_at_tc_ingress(skb); dont_clone = skb_at_tc_ingress(skb) && is_redirect && tcf_mirred_can_reinsert(retval); if (!dont_clone) { @@ -279,17 +290,6 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, goto err_cant_do; } - want_ingress = tcf_mirred_act_wants_ingress(m_eaction); - - if (dev