aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-05-29 12:57:22 -0700
committerJakub Kicinski <kuba@kernel.org>2026-05-29 12:57:23 -0700
commitc84ff04def255edb51e57c9f969efdfade0da16a (patch)
treed9d91746256c5170bf2ac70b129abcfc3c887c2e
parentff6e798c2eac3ebd0501ad7e796f583fab928de8 (diff)
parent6851161feb01cea41358c9ec304bd2f981fc8505 (diff)
Merge tag 'ipsec-2026-05-29' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
Steffen Klassert says: ==================== pull request (net): ipsec 2026-05-29 1) xfrm: route MIGRATE notifications to caller's netns Thread the caller's netns through km_migrate() so that MIGRATE notifications go to the issuing netns, fixing both the init_net listener leak and MOBIKE notifications inside non-init netns. From Maoyi Xie. 2) xfrm: ipcomp: Free destination pages on acomp errors Move the out_free_req label up so that allocated destination pages are released on decompression errors, not only on success. From Herbert Xu. 3) xfrm: Check for underflow in xfrm_state_mtu Reject configurations that cause xfrm_state_mtu() to underflow, preventing a negative TFCPAD value from becoming a memset size that triggers an out-of-bounds write of several terabytes. From David Ahern. 4) xfrm: ah: use skb_to_full_sk in async output callbacks Convert the possibly-incomplete skb->sk to a full socket pointer in async AH callbacks so that a request_sock or timewait_sock never reaches xfrm_output_resume() downstream consumers. From Michael Bommarito. 5) Add and revert: esp: fix page frag reference leak on skb_to_sgvec failure The patch does not fix te issue completely. 6) xfrm: esp: restore combined single-frag length gate Check the aligned post-trailer combined length against a page limit in the fast path, preventing skb_page_frag_refill() from falling back to a page too small for the destination scatterlist. From Jingguo Tan. 7) xfrm: iptfs: reset runtime state when cloning SAs Reinitialise the clone's mode_data runtime objects before publishing it, preventing queued skbs from being freed with list state copied from the original SA when migration fails. From Shaomin Chen. 8) xfrm: move policy_bydst RCU sync from per-netns .exit to .pre_exit Flush policy tables and drain the workqueue in a .pre_exit handler so that cleanup_net() pays one RCU grace period per batch instead of one per namespace, fixing stalls at high CLONE_NEWNET rates. From Usama Arif. 9) xfrm: input: hold netns during deferred transport reinjection Take a netns reference when queueing deferred transport reinjection work and drop it after the callback completes, keeping the skb->cb net pointer valid until the deferred work runs. From Zhengchuan Liang. * tag 'ipsec-2026-05-29' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec: Revert "esp: fix page frag reference leak on skb_to_sgvec failure" xfrm: input: hold netns during deferred transport reinjection xfrm: move policy_bydst RCU sync from per-netns .exit to .pre_exit xfrm: iptfs: reset runtime state when cloning SAs xfrm: esp: restore combined single-frag length gate esp: fix page frag reference leak on skb_to_sgvec failure xfrm: ah: use skb_to_full_sk in async output callbacks xfrm: Check for underflow in xfrm_state_mtu xfrm: ipcomp: Free destination pages on acomp errors xfrm: route MIGRATE notifications to caller's netns ==================== Link: https://patch.msgid.link/20260529092648.3878973-1-steffen.klassert@secunet.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/net/xfrm.h3
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/esp4.c4
-rw-r--r--net/ipv6/ah6.c2
-rw-r--r--net/ipv6/esp6.c4
-rw-r--r--net/key/af_key.c6
-rw-r--r--net/xfrm/xfrm_input.c16
-rw-r--r--net/xfrm/xfrm_ipcomp.c12
-rw-r--r--net/xfrm/xfrm_iptfs.c28
-rw-r--r--net/xfrm/xfrm_policy.c17
-rw-r--r--net/xfrm/xfrm_state.c23
-rw-r--r--net/xfrm/xfrm_user.c5
12 files changed, 83 insertions, 39 deletions
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 10d3edde6b2f..874409127e29 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -715,6 +715,7 @@ struct xfrm_mgr {
const struct xfrm_migrate *m,
int num_bundles,
const struct xfrm_kmaddress *k,
+ struct net *net,
const struct xfrm_encap_tmpl *encap);
bool (*is_alive)(const struct km_event *c);
};
@@ -1891,7 +1892,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
#ifdef CONFIG_XFRM_MIGRATE
int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_bundles,
- const struct xfrm_kmaddress *k,
+ const struct xfrm_kmaddress *k, struct net *net,
const struct xfrm_encap_tmpl *encap);
struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
u32 if_id);
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 4366cbac3f06..6fd642d2278d 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -143,7 +143,7 @@ static void ah_output_done(void *data, int err)
}
kfree(AH_SKB_CB(skb)->tmp);
- xfrm_output_resume(skb->sk, skb, err);
+ xfrm_output_resume(skb_to_full_sk(skb), skb, err);
}
static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 6a5febbdbee4..513c8215c947 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -419,8 +419,8 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
return err;
}
- if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
- ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
+ if (ALIGN(skb->data_len + tailen, L1_CACHE_BYTES) >
+ PAGE_SIZE)
goto cow;
if (!skb_cloned(skb)) {
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index de1e68199a01..76f7a2de9108 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -337,7 +337,7 @@ static void ah6_output_done(void *data, int err)
ah6_restore_hdrs(top_iph, iph_ext, extlen);
kfree(AH_SKB_CB(skb)->tmp);
- xfrm_output_resume(skb->sk, skb, err);
+ xfrm_output_resume(skb_to_full_sk(skb), skb, err);
}
static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 9c06c5a1419d..57481e423e59 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -448,8 +448,8 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
return err;
}
- if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
- ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
+ if (ALIGN(skb->data_len + tailen, L1_CACHE_BYTES) >
+ PAGE_SIZE)
goto cow;
if (!skb_cloned(skb)) {
diff --git a/net/key/af_key.c b/net/key/af_key.c
index a166a88d8788..9cffeef18cd9 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3564,7 +3564,7 @@ static int set_ipsecrequest(struct sk_buff *skb,
#ifdef CONFIG_NET_KEY_MIGRATE
static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_bundles,
- const struct xfrm_kmaddress *k,
+ const struct xfrm_kmaddress *k, struct net *net,
const struct xfrm_encap_tmpl *encap)
{
int i;
@@ -3669,7 +3669,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* broadcast migrate message to sockets */
- pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net);
+ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, net);
return 0;
@@ -3680,7 +3680,7 @@ err:
#else
static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_bundles,
- const struct xfrm_kmaddress *k,
+ const struct xfrm_kmaddress *k, struct net *net,
const struct xfrm_encap_tmpl *encap)
{
return -ENOPROTOOPT;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index f65291eba1f6..e4c2cd24936d 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -797,9 +797,12 @@ static void xfrm_trans_reinject(struct work_struct *work)
spin_unlock_bh(&trans->queue_lock);
local_bh_disable();
- while ((skb = __skb_dequeue(&queue)))
- XFRM_TRANS_SKB_CB(skb)->finish(XFRM_TRANS_SKB_CB(skb)->net,
- NULL, skb);
+ while ((skb = __skb_dequeue(&queue))) {
+ struct net *net = XFRM_TRANS_SKB_CB(skb)->net;
+
+ XFRM_TRANS_SKB_CB(skb)->finish(net, NULL, skb);
+ put_net(net);
+ }
local_bh_enable();
}
@@ -808,6 +811,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
struct sk_buff *))
{
struct xfrm_trans_tasklet *trans;
+ struct net *hold_net;
trans = this_cpu_ptr(&xfrm_trans_tasklet);
@@ -816,8 +820,12 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
+ hold_net = maybe_get_net(net);
+ if (!hold_net)
+ return -ENODEV;
+
XFRM_TRANS_SKB_CB(skb)->finish = finish;
- XFRM_TRANS_SKB_CB(skb)->net = net;
+ XFRM_TRANS_SKB_CB(skb)->net = hold_net;
spin_lock_bh(&trans->queue_lock);
__skb_queue_tail(&trans->queue, skb);
spin_unlock_bh(&trans->queue_lock);
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 5f38dff16177..671d48f8c937 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -51,11 +51,15 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen)
struct scatterlist *dsg;
int len, dlen;
- if (unlikely(err))
- goto out_free_req;
+ if (unlikely(!req))
+ return err;
extra = acomp_request_extra(req);
dsg = extra->sg;
+
+ if (unlikely(err))
+ goto out_free_req;
+
dlen = req->dlen;
pskb_trim_unique(skb, 0);
@@ -84,10 +88,10 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen)
skb_shinfo(skb)->nr_frags++;
} while ((dlen -= len));
- for (; dsg; dsg = sg_next(dsg))
+out_free_req:
+ for (; dsg && sg_page(dsg); dsg = sg_next(dsg))
__free_page(sg_page(dsg));
-out_free_req:
acomp_request_free(req);
return err;
}
diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c
index 97bc979e55ba..6c6bbc040517 100644
--- a/net/xfrm/xfrm_iptfs.c
+++ b/net/xfrm/xfrm_iptfs.c
@@ -2650,7 +2650,8 @@ static void __iptfs_init_state(struct xfrm_state *x,
x->props.enc_hdr_len = sizeof(struct ip_iptfs_hdr);
/* Always keep a module reference when x->mode_data is set */
- __module_get(x->mode_cbs->owner);
+ if (x->mode_data != xtfs)
+ __module_get(x->mode_cbs->owner);
x->mode_data = xtfs;
xtfs->x = x;
@@ -2658,22 +2659,39 @@ static void __iptfs_init_state(struct xfrm_state *x,
static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig)
{
+ struct skb_wseq *w_saved = NULL;
struct xfrm_iptfs_data *xtfs;
xtfs = kmemdup(orig->mode_data, sizeof(*xtfs), GFP_KERNEL);
if (!xtfs)
return -ENOMEM;
- xtfs->ra_newskb = NULL;
if (xtfs->cfg.reorder_win_size) {
- xtfs->w_saved = kzalloc_objs(*xtfs->w_saved,
- xtfs->cfg.reorder_win_size);
- if (!xtfs->w_saved) {
+ w_saved = kzalloc_objs(*w_saved, xtfs->cfg.reorder_win_size);
+ if (!w_saved) {
kfree_sensitive(xtfs);
return -ENOMEM;
}
}
+ xtfs->w_saved = w_saved;
+
+ __skb_queue_head_init(&xtfs->queue);
+ xtfs->queue_size = 0;
+ hrtimer_setup(&xtfs->iptfs_timer, iptfs_delay_timer, CLOCK_MONOTONIC,
+ IPTFS_HRTIMER_MODE);
+
+ spin_lock_init(&xtfs->drop_lock);
+ hrtimer_setup(&xtfs->drop_timer, iptfs_drop_timer, CLOCK_MONOTONIC,
+ IPTFS_HRTIMER_MODE);
+ xtfs->w_seq_set = false;
+ xtfs->w_wantseq = 0;
+ xtfs->w_savedlen = 0;
+ xtfs->ra_newskb = NULL;
+ xtfs->ra_wantseq = 0;
+ xtfs->ra_runtlen = 0;
+
+ __module_get(x->mode_cbs->owner);
x->mode_data = xtfs;
xtfs->x = x;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c944327ce66c..dd09d2063da2 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -4276,21 +4276,21 @@ out_byidx:
return -ENOMEM;
}
-static void xfrm_policy_fini(struct net *net)
+static void __net_exit xfrm_net_pre_exit(struct net *net)
{
- struct xfrm_pol_inexact_bin *b, *t;
- unsigned int sz;
- int dir;
-
disable_work_sync(&net->xfrm.policy_hthresh.work);
-
flush_work(&net->xfrm.policy_hash_work);
#ifdef CONFIG_XFRM_SUB_POLICY
xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
#endif
xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
+}
- synchronize_rcu();
+static void xfrm_policy_fini(struct net *net)
+{
+ struct xfrm_pol_inexact_bin *b, *t;
+ unsigned int sz;
+ int dir;
WARN_ON(!list_empty(&net->xfrm.policy_all));
@@ -4368,6 +4368,7 @@ static void __net_exit xfrm_net_exit(struct net *net)
static struct pernet_operations __net_initdata xfrm_net_ops = {
.init = xfrm_net_init,
+ .pre_exit = xfrm_net_pre_exit,
.exit = xfrm_net_exit,
};
@@ -4703,7 +4704,7 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* Stage 5 - announce */
- km_migrate(sel, dir, type, m, num_migrate, k, encap);
+ km_migrate(sel, dir, type, m, num_migrate, k, net, encap);
xfrm_pol_put(pol);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 686014d39429..589c3b6e4679 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2837,7 +2837,7 @@ EXPORT_SYMBOL(km_policy_expired);
#ifdef CONFIG_XFRM_MIGRATE
int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_migrate,
- const struct xfrm_kmaddress *k,
+ const struct xfrm_kmaddress *k, struct net *net,
const struct xfrm_encap_tmpl *encap)
{
int err = -EINVAL;
@@ -2848,7 +2848,7 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
list_for_each_entry_rcu(km, &xfrm_km_list, list) {
if (km->migrate) {
ret = km->migrate(sel, dir, type, m, num_migrate, k,
- encap);
+ net, encap);
if (!ret)
err = ret;
}
@@ -3114,10 +3114,14 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
const struct xfrm_type *type = READ_ONCE(x->type);
struct crypto_aead *aead;
u32 blksize, net_adj = 0;
+ u32 overhead, payload_mtu;
if (x->km.state != XFRM_STATE_VALID ||
- !type || type->proto != IPPROTO_ESP)
+ !type || type->proto != IPPROTO_ESP) {
+ if (mtu <= x->props.header_len)
+ return 1;
return mtu - x->props.header_len;
+ }
aead = x->data;
blksize = ALIGN(crypto_aead_blocksize(aead), 4);
@@ -3140,8 +3144,17 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
break;
}
- return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
- net_adj) & ~(blksize - 1)) + net_adj - 2;
+ overhead = x->props.header_len + crypto_aead_authsize(aead) + net_adj;
+ if (mtu <= overhead)
+ return 1;
+
+ payload_mtu = mtu - overhead;
+ payload_mtu &= ~(blksize - 1);
+ if (payload_mtu <= 2)
+ return 1;
+
+ return payload_mtu + net_adj - 2;
+
}
EXPORT_SYMBOL_GPL(xfrm_state_mtu);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 38a90e5ee3d9..71a4b7278eba 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -3271,10 +3271,9 @@ out_cancel:
static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_migrate,
- const struct xfrm_kmaddress *k,
+ const struct xfrm_kmaddress *k, struct net *net,
const struct xfrm_encap_tmpl *encap)
{
- struct net *net = &init_net;
struct sk_buff *skb;
int err;
@@ -3292,7 +3291,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
#else
static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_migrate,
- const struct xfrm_kmaddress *k,
+ const struct xfrm_kmaddress *k, struct net *net,
const struct xfrm_encap_tmpl *encap)
{
return -ENOPROTOOPT;