diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-06-16 08:55:47 -0700 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-06-16 08:55:48 -0700 |
| commit | ffaf8967735e7271a4a1362840bcfbbf2a7a722b (patch) | |
| tree | 592952659c27f5c1d22cc664a7c7599a3dc47bd4 | |
| parent | e1f544466b2c6c67e70e42118a7fb39cee0b374a (diff) | |
| parent | 5949a7cf11e685dd171e33586c272dfe673310b6 (diff) | |
Merge branch 'tls-reject-the-combination-of-tls-and-sockmap'
Jakub Kicinski says:
====================
tls: reject the combination of TLS and sockmap
There are no known TLS+sockmap users and it has some known
hard to solve bugs. Let's reject this configuration as we
discussed a number of times.
====================
Link: https://patch.msgid.link/20260614014102.461064-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
| -rw-r--r-- | include/linux/skmsg.h | 9 | ||||
| -rw-r--r-- | net/core/skmsg.c | 52 | ||||
| -rw-r--r-- | net/tls/tls_main.c | 19 | ||||
| -rw-r--r-- | net/tls/tls_sw.c | 399 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c | 420 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/test_sockmap_kern.h | 56 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/test_sockmap_ktls.c | 61 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/test_sockmap.c | 250 |
8 files changed, 119 insertions, 1147 deletions
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 19f4f253b4f9..ca0ec9c8608e 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -544,15 +544,6 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs) psock_set_prog(&progs->skb_verdict, NULL); } -int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb); - -static inline bool sk_psock_strp_enabled(struct sk_psock *psock) -{ - if (!psock) - return false; - return !!psock->saved_data_ready; -} - /* for tcp only, sk is locked */ static inline ssize_t sk_psock_msg_inq(struct sock *sk) { diff --git a/net/core/skmsg.c b/net/core/skmsg.c index e1850caf1a71..73ae12f25940 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -7,7 +7,6 @@ #include <net/sock.h> #include <net/tcp.h> -#include <net/tls.h> #include <trace/events/sock.h> static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce) @@ -992,41 +991,6 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb) return 0; } -static void sk_psock_tls_verdict_apply(struct sk_buff *skb, - struct sk_psock *from, int verdict) -{ - switch (verdict) { - case __SK_REDIRECT: - sk_psock_skb_redirect(from, skb); - break; - case __SK_PASS: - case __SK_DROP: - default: - break; - } -} - -int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb) -{ - struct bpf_prog *prog; - int ret = __SK_PASS; - - rcu_read_lock(); - prog = READ_ONCE(psock->progs.stream_verdict); - if (likely(prog)) { - skb->sk = psock->sk; - skb_dst_drop(skb); - skb_bpf_redirect_clear(skb); - ret = bpf_prog_run_pin_on_cpu(prog, skb); - ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); - skb->sk = NULL; - } - sk_psock_tls_verdict_apply(skb, psock, ret); - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); - static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, int verdict) { @@ -1167,13 +1131,9 @@ static void sk_psock_strp_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); if (likely(psock)) { - if (tls_sw_has_ctx_rx(sk)) { - psock->saved_data_ready(sk); - } else { - read_lock_bh(&sk->sk_callback_lock); - strp_data_ready(&psock->strp); - read_unlock_bh(&sk->sk_callback_lock); - } + read_lock_bh(&sk->sk_callback_lock); + strp_data_ready(&psock->strp); + read_unlock_bh(&sk->sk_callback_lock); } rcu_read_unlock(); } @@ -1275,12 +1235,6 @@ static void sk_psock_verdict_data_ready(struct sock *sk) trace_sk_data_ready(sk); rcu_read_lock(); - psock = sk_psock(sk); - if (psock && tls_sw_has_ctx_rx(sk)) { - psock->saved_data_ready(sk); - rcu_read_unlock(); - return; - } sock = READ_ONCE(sk->sk_socket); if (likely(sock)) ops = READ_ONCE(sock->ops); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 13c88a7b8787..8c588cdab733 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -402,7 +402,6 @@ static __poll_t tls_sk_poll(struct file *file, struct socket *sock, struct tls_sw_context_rx *ctx; struct tls_context *tls_ctx; struct sock *sk = sock->sk; - struct sk_psock *psock; __poll_t mask = 0; u8 shutdown; int state; @@ -416,17 +415,12 @@ static __poll_t tls_sk_poll(struct file *file, struct socket *sock, tls_ctx = tls_get_ctx(sk); ctx = tls_sw_ctx_rx(tls_ctx); - psock = sk_psock_get(sk); if ((skb_queue_empty_lockless(&ctx->rx_list) && - !tls_strp_msg_ready(ctx) && - sk_psock_queue_empty(psock)) || + !tls_strp_msg_ready(ctx)) || READ_ONCE(ctx->key_update_pending)) mask &= ~(EPOLLIN | EPOLLRDNORM); - if (psock) - sk_psock_put(sk, psock); - return mask; } @@ -643,6 +637,17 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, int rc = 0; int conf; + /* TLS and sockmap are mutually exclusive. A socket already in a + * sockmap (i.e. with a psock attached) cannot be upgraded to TLS. + * sockmap rejects TLS sockets already (see sk_psock_init()). + */ + rcu_read_lock(); + if (sk_psock(sk)) { + rcu_read_unlock(); + return -EINVAL; + } + rcu_read_unlock(); + if (sockptr_is_null(optval) || (optlen < sizeof(*crypto_info))) return -EINVAL; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 5a4300c943a1..9324e4ed20a3 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -614,136 +614,17 @@ static int tls_do_encryption(struct sock *sk, return rc; } -static int tls_split_open_record(struct sock *sk, struct tls_rec *from, - struct tls_rec **to, struct sk_msg *msg_opl, - struct sk_msg *msg_oen, u32 split_point, - u32 tx_overhead_size, u32 *orig_end) -{ - u32 i, j, bytes = 0, apply = msg_opl->apply_bytes; - struct scatterlist *sge, *osge, *nsge; - u32 orig_size = msg_opl->sg.size; - struct scatterlist tmp = { }; - struct sk_msg *msg_npl; - struct tls_rec *new; - int ret; - - new = tls_get_rec(sk); - if (!new) - return -ENOMEM; - ret = sk_msg_alloc(sk, &new->msg_encrypted, msg_opl->sg.size + - tx_overhead_size, 0); - if (ret < 0) { - tls_free_rec(sk, new); - return ret; - } - - *orig_end = msg_opl->sg.end; - i = msg_opl->sg.start; - sge = sk_msg_elem(msg_opl, i); - while (apply && sge->length) { - if (sge->length > apply) { - u32 len = sge->length - apply; - - get_page(sg_page(sge)); - sg_set_page(&tmp, sg_page(sge), len, - sge->offset + apply); - sge->length = apply; - bytes += apply; - apply = 0; - } else { - apply -= sge->length; - bytes += sge->length; - } - - sk_msg_iter_var_next(i); - if (i == msg_opl->sg.end) - break; - sge = sk_msg_elem(msg_opl, i); - } - - msg_opl->sg.end = i; - msg_opl->sg.curr = i; - msg_opl->sg.copybreak = 0; - msg_opl->apply_bytes = 0; - msg_opl->sg.size = bytes; - - msg_npl = &new->msg_plaintext; - msg_npl->apply_bytes = apply; - msg_npl->sg.size = orig_size - bytes; - - j = msg_npl->sg.start; - nsge = sk_msg_elem(msg_npl, j); - if (tmp.length) { - memcpy(nsge, &tmp, sizeof(*nsge)); - sk_msg_iter_var_next(j); - nsge = sk_msg_elem(msg_npl, j); - } - - osge = sk_msg_elem(msg_opl, i); - while (osge->length) { - memcpy(nsge, osge, sizeof(*nsge)); - sg_unmark_end(nsge); - sk_msg_iter_var_next(i); - sk_msg_iter_var_next(j); - if (i == *orig_end) - break; - osge = sk_msg_elem(msg_opl, i); - nsge = sk_msg_elem(msg_npl, j); - } - - msg_npl->sg.end = j; - msg_npl->sg.curr = j; - msg_npl->sg.copybreak = 0; - - *to = new; - return 0; -} - -static void tls_merge_open_record(struct sock *sk, struct tls_rec *to, - struct tls_rec *from, u32 orig_end) -{ - struct sk_msg *msg_npl = &from->msg_plaintext; - struct sk_msg *msg_opl = &to->msg_plaintext; - struct scatterlist *osge, *nsge; - u32 i, j; - - i = msg_opl->sg.end; - sk_msg_iter_var_prev(i); - j = msg_npl->sg.start; - - osge = sk_msg_elem(msg_opl, i); - nsge = sk_msg_elem(msg_npl, j); - - if (sg_page(osge) == sg_page(nsge) && - osge->offset + osge->length == nsge->offset) { - osge->length += nsge->length; - put_page(sg_page(nsge)); - } - - msg_opl->sg.end = orig_end; - msg_opl->sg.curr = orig_end; - msg_opl->sg.copybreak = 0; - msg_opl->apply_bytes = msg_opl->sg.size + msg_npl->sg.size; - msg_opl->sg.size += msg_npl->sg.size; - - sk_msg_free(sk, &to->msg_encrypted); - sk_msg_xfer_full(&to->msg_encrypted, &from->msg_encrypted); - - kfree(from); -} - static int tls_push_record(struct sock *sk, int flags, unsigned char record_type) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); - struct tls_rec *rec = ctx->open_rec, *tmp = NULL; - u32 i, split_point, orig_end; + struct tls_rec *rec = ctx->open_rec; struct sk_msg *msg_pl, *msg_en; struct aead_request *req; - bool split; int rc; + u32 i; if (!rec) return 0; @@ -751,38 +632,6 @@ static int tls_push_record(struct sock *sk, int flags, msg_pl = &rec->msg_plaintext; msg_en = &rec->msg_encrypted; - split_point = msg_pl->apply_bytes; - split = split_point && split_point < msg_pl->sg.size; - if (unlikely((!split && - msg_pl->sg.size + - prot->overhead_size > msg_en->sg.size) || - (split && - split_point + - prot->overhead_size > msg_en->sg.size))) { - split = true; - split_point = msg_en->sg.size; - } - if (split) { - rc = tls_split_open_record(sk, rec, &tmp, msg_pl, msg_en, - split_point, prot->overhead_size, - &orig_end); - if (rc < 0) - return rc; - /* This can happen if above tls_split_open_record allocates - * a single large encryption buffer instead of two smaller - * ones. In this case adjust pointers and continue without - * split. - */ - if (!msg_pl->sg.size) { - tls_merge_open_record(sk, rec, tmp, orig_end); - msg_pl = &rec->msg_plaintext; - msg_en = &rec->msg_encrypted; - split = false; - } - sk_msg_trim(sk, msg_en, msg_pl->sg.size + - prot->overhead_size); - } - rec->tx_flags = flags; req = &rec->aead_req; @@ -840,155 +689,26 @@ static int tls_push_record(struct sock *sk, int flags, rc = tls_do_encryption(sk, tls_ctx, ctx, req, msg_pl->sg.size + prot->tail_size, i); if (rc < 0) { - if (rc != -EINPROGRESS) { + if (rc != -EINPROGRESS) tls_err_abort(sk, -EBADMSG); - if (split) { - tls_ctx->pending_open_record_frags = true; - tls_merge_open_record(sk, rec, tmp, orig_end); - } - } ctx->async_capable = 1; return rc; - } else if (split) { - msg_pl = &tmp->msg_plaintext; - msg_en = &tmp->msg_encrypted; - sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size); - tls_ctx->pending_open_record_frags = true; - ctx->open_rec = tmp; } return tls_tx_records(sk, flags); } static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, - bool full_record, u8 record_type, - ssize_t *copied, int flags) + u8 record_type, ssize_t *copied, int flags) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); - struct sk_msg msg_redir = { }; - struct sk_psock *psock; - struct sock *sk_redir; - struct tls_rec *rec; - bool enospc, policy, redir_ingress; - int err = 0, send; - u32 delta = 0; - - policy = !(flags & MSG_SENDPAGE_NOPOLICY); - psock = sk_psock_get(sk); - if (!psock || !policy) { - err = tls_push_record(sk, flags, record_type); - if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { - *copied -= sk_msg_free(sk, msg); - tls_free_open_rec(sk); - err = -sk->sk_err; - } - if (psock) - sk_psock_put(sk, psock); - return err; - } -more_data: - enospc = sk_msg_full(msg); - if (psock->eval == __SK_NONE) { - delta = msg->sg.size; - psock->eval = sk_psock_msg_verdict(sk, psock, msg); - delta -= msg->sg.size; - - if ((s32)delta > 0) { - /* It indicates that we executed bpf_msg_pop_data(), - * causing the plaintext data size to decrease. - * Therefore the encrypted data size also needs to - * correspondingly decrease. We only need to subtract - * delta to calculate the new ciphertext length since - * ktls does not support block encryption. - */ - struct sk_msg *enc = &ctx->open_rec->msg_encrypted; + int err; - sk_msg_trim(sk, enc, enc->sg.size - delta); - } - } - if (msg->cork_bytes && msg->cork_bytes > msg->sg.size && - !enospc && !full_record) { - err = -ENOSPC; - goto out_err; - } - msg->cork_bytes = 0; - send = msg->sg.size; - if (msg->apply_bytes && msg->apply_bytes < send) - send = msg->apply_bytes; - - switch (psock->eval) { - case __SK_PASS: - err = tls_push_record(sk, flags, record_type); - if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { - *copied -= sk_msg_free(sk, msg); - tls_free_open_rec(sk); - err = -sk->sk_err; - goto out_err; - } - break; - case __SK_REDIRECT: - redir_ingress = psock->redir_ingress; - sk_redir = psock->sk_redir; - memcpy(&msg_redir, msg, sizeof(*msg)); - if (msg->apply_bytes < send) - msg->apply_bytes = 0; - else - msg->apply_bytes -= send; - sk_msg_return_zero(sk, msg, send); - msg->sg.size -= send; - release_sock(sk); - err = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress, - &msg_redir, send, flags); - lock_sock(sk); - if (err < 0) { - /* Regardless of whether the data represented by - * msg_redir is sent successfully, we have already - * uncharged it via sk_msg_return_zero(). The - * msg->sg.size represents the remaining unprocessed - * data, which needs to be uncharged here. - */ - sk_mem_uncharge(sk, msg->sg.size); - *copied -= sk_msg_free_nocharge(sk, &msg_redir); - msg->sg.size = 0; - } - if (msg->sg.size == 0) - tls_free_open_rec(sk); - break; - case __SK_DROP: - default: - sk_msg_free_partial(sk, msg, send); - if (msg->apply_bytes < send) - msg->apply_bytes = 0; - else - msg->apply_bytes -= send; - if (msg->sg.size == 0) - tls_free_open_rec(sk); - *copied -= (send + delta); - err = -EACCES; + err = tls_push_record(sk, flags, record_type); + if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { + *copied -= sk_msg_free(sk, msg); + tls_free_open_rec(sk); + err = -sk->sk_err; } - - if (likely(!err)) { - bool reset_eval = !ctx->open_rec; - - rec = ctx->open_rec; - if (rec) { - msg = &rec->msg_plaintext; - if (!msg->apply_bytes) - reset_eval = true; - } - if (reset_eval) { - psock->eval = __SK_NONE; - if (psock->sk_redir) { - sock_put(psock->sk_redir); - psock->sk_redir = NULL; - } - } - if (rec) - goto more_data; - } - out_err: - sk_psock_put(sk, psock); return err; } @@ -1008,7 +728,7 @@ static int tls_sw_push_pending_record(struct sock *sk, int flags) if (!copied) return 0; - return bpf_exec_tx_verdict(msg_pl, sk, true, TLS_RECORD_TYPE_DATA, + return bpf_exec_tx_verdict(msg_pl, sk, TLS_RECORD_TYPE_DATA, &copied, flags); } @@ -1156,7 +876,7 @@ alloc_encrypted: copied += try_to_copy; sk_msg_sg_copy_set(msg_pl, first); - ret = bpf_exec_tx_verdict(msg_pl, sk, full_record, + ret = bpf_exec_tx_verdict(msg_pl, sk, record_type, &copied, msg->msg_flags); if (ret) { @@ -1164,13 +884,7 @@ alloc_encrypted: num_async++; else if (ret == -ENOMEM) goto wait_for_memory; - else if (ctx->open_rec && ret == -ENOSPC) { - if (msg_pl->cork_bytes) { - ret = 0; - goto send_end; - } - goto rollback_iter; - } else if (ret != -EAGAIN) + else if (ret != -EAGAIN) goto send_end; } @@ -1181,11 +895,6 @@ alloc_encrypted: } continue; -rollback_iter: - copied -= try_to_copy; - sk_msg_sg_copy_clear(msg_pl, first); - iov_iter_revert(&msg->msg_iter, - msg_pl->sg.size - orig_size); fallback_to_reg_send: sk_msg_trim(sk, msg_pl, orig_size); } @@ -1221,7 +930,7 @@ fallback_to_reg_send: copied += try_to_copy; copied: if (full_record || eor) { - ret = bpf_exec_tx_verdict(msg_pl, sk, full_record, + ret = bpf_exec_tx_verdict(msg_pl, sk, record_type, &copied, msg->msg_flags); if (ret) { @@ -1229,11 +938,8 @@ copied: num_async++; else if (ret == -ENOMEM) goto wait_for_memory; - else if (ret != -EAGAIN) { - if (ret == -ENOSPC) - ret = 0; + else if (ret != -EAGAIN) goto send_end; - } } /* Transmit if any encryptions have completed */ @@ -1335,8 +1041,8 @@ retry: if (msg_pl->sg.size == 0) goto unlock; - /* Check the BPF advisor and perform transmission. */ - ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA, + /* Perform transmission. */ + ret = bpf_exec_tx_verdict(msg_pl, sk, TLS_RECORD_TYPE_DATA, &copied, 0); switch (ret) { case 0: @@ -1372,8 +1078,7 @@ unlock: * consumed via sock_error(). */ static int -tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, - bool released, bool has_copied) +tls_rx_rec_wait(struct sock *sk, bool nonblock, bool released, bool has_copied) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); @@ -1388,9 +1093,6 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, timeo = sock_rcvtimeo(sk, nonblock); while (!tls_strp_msg_ready(ctx)) { - if (!sk_psock_queue_empty(psock)) - return 0; - if (sk->sk_err) { if (has_copied) return -READ_ONCE(sk->sk_err); @@ -1434,9 +1136,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); ret = sk_wait_event(sk, &timeo, - tls_strp_msg_ready(ctx) || - !sk_psock_queue_empty(psock), - &wait); + tls_strp_msg_ready(ctx), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); remove_wait_queue(sk_sleep(sk), &wait); @@ -1446,7 +1146,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, } if (unlikely(!tls_strp_msg_load(&ctx->strp, released))) - return tls_rx_rec_wait(sk, psock, nonblock, false, has_copied); + return tls_rx_rec_wait(sk, nonblock, false, has_copied); return 1; } @@ -2096,7 +1796,6 @@ int tls_sw_recvmsg(struct sock *sk, struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; ssize_t decrypted = 0, async_copy_bytes = 0; - struct sk_psock *psock; unsigned char control = 0; size_t flushed_at = 0; struct strp_msg *rxm; @@ -2109,7 +1808,6 @@ int tls_sw_recvmsg(struct sock *sk, bool is_peek = flags & MSG_PEEK; bool rx_more = false; bool released = true; - bool bpf_strp_enabled; bool zc_capable; if (unlikely(flags & MSG_ERRQUEUE)) @@ -2118,8 +1816,6 @@ int tls_sw_recvmsg(struct sock *sk, err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); if (err < 0) return err; - psock = sk_psock_get(sk); - bpf_strp_enabled = sk_psock_strp_enabled(psock); /* If crypto failed the connection is broken */ err = ctx->async_wait.err; @@ -2140,27 +1836,16 @@ int tls_sw_recvmsg(struct sock *sk, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); len = len - copied; - zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek && - ctx->zc_capable; + zc_capable = !is_kvec && !is_peek && ctx->zc_capable; decrypted = 0; while (len && (decrypted + copied < target || tls_strp_msg_ready(ctx))) { struct tls_decrypt_arg darg; int to_decrypt, chunk; - err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT, + err = tls_rx_rec_wait(sk, flags & MSG_DONTWAIT, released, !!(decrypted + copied)); - if (err <= 0) { - if (psock) { - chunk = sk_msg_recvmsg(sk, psock, msg, len, - flags); - if (chunk > 0) { - decrypted += chunk; - len -= chunk; - continue; - } - } + if (err <= 0) goto recv_end; - } memset(&darg.inargs, 0, sizeof(darg.inargs)); @@ -2174,7 +1859,7 @@ int tls_sw_recvmsg(struct sock *sk, darg.zc = true; /* Do not use async mode if record is non-data */ - if (tlm->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled) + if (tlm->control == TLS_RECORD_TYPE_DATA) darg.async = ctx->async_capable; else darg.async = false; @@ -2230,18 +1915,6 @@ put_on_rx_list: continue; } - if (bpf_strp_enabled) { - released = true; - err = sk_psock_tls_strp_read(psock, skb); - if (err != __SK_PASS) { - rxm->offset = rxm->offset + rxm->full_len; - rxm->full_len = 0; - if (err == __SK_DROP) - consume_skb(skb); - continue; - } - } - if (partially_consumed) chunk = len; @@ -2304,8 +1977,6 @@ recv_end: end: tls_rx_reader_unlock(sk, ctx); - if (psock) - sk_psock_put(sk, psock); return copied ? : err; } @@ -2332,7 +2003,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, } else { struct tls_decrypt_arg darg; - err = tls_rx_rec_wait(sk, NULL, flags & SPLICE_F_NONBLOCK, + err = tls_rx_rec_wait(sk, flags & SPLICE_F_NONBLOCK, true, false); if (err <= 0) goto splice_read_end; @@ -2417,7 +2088,7 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, } else { struct tls_decrypt_arg darg; - err = tls_rx_rec_wait(sk, NULL, true, released, !!copied); + err = tls_rx_rec_wait(sk, true, released, !!copied); if (err <= 0) goto read_sock_end; @@ -2473,16 +2144,8 @@ bool tls_sw_sock_is_readable(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - bool ingress_empty = true; - struct sk_psock *psock; - rcu_read_lock(); - psock = sk_psock(sk); - if (psock) - ingress_empty = list_empty(&psock->ingress_msg); - rcu_read_unlock(); - - return !ingress_empty || tls_strp_msg_ready(ctx) || + return tls_strp_msg_ready(ctx) || !skb_queue_empty(&ctx->rx_list); } @@ -2565,7 +2228,6 @@ static void tls_data_ready(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - struct sk_psock *psock; gfp_t alloc_save; trace_sk_data_ready(sk); @@ -2574,13 +2236,6 @@ static void tls_data_ready(struct sock *sk) sk->sk_allocation = GFP_ATOMIC; tls_strp_data_ready(&ctx->strp); sk->sk_allocation = alloc_save; - - psock = sk_psock_get(sk); - if (psock) { - if (!list_empty(&psock->ingress_msg)) - ctx->saved_data_ready(sk); - sk_psock_put(sk, psock); - } } void tls_sw_cancel_work_tx(struct tls_context *tls_ctx) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 6ed8e149e3d5..34737e8df6ea 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -9,7 +9,6 @@ #include "test_progs.h" #include "sockmap_helpers.h" #include "test_skmsg_load_helpers.skel.h" -#include "test_sockmap_ktls.skel.h" #define MAX_TEST_NAME 80 #define TCP_ULP 31 @@ -117,6 +116,68 @@ close: close(s); } +static void test_sockmap_ktls_enable_fails_when_in_sockmap(int family, int map) +{ + struct tls12_crypto_info_aes_gcm_128 crypto = { + .info = { + .version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, + }, + }; + struct sockaddr_storage addr = {}; + socklen_t len = sizeof(addr); + struct sockaddr_in6 *v6; + struct sockaddr_in *v4; + int err, s, zero = 0; + + switch (family) { + case AF_INET: + v4 = (struct sockaddr_in *)&addr; + v4->sin_family = AF_INET; + break; + case AF_INET6: + v6 = (struct sockaddr_in6 *)&addr; + v6->sin6_family = AF_INET6; + break; + default: + PRINT_FAIL("unsupported socket family %d", family); + return; + } + + s = socket(family, SOCK_STREAM, 0); + if (!ASSERT_GE(s, 0, "socket")) + return; + + err = bind(s, (struct sockaddr *)&addr, len); + if (!ASSERT_OK(err, "bind")) + goto close; + + err = getsockname(s, (struct sockaddr *)&addr, &len); + if (!ASSERT_OK(err, "getsockname")) + goto close; + + err = connect(s, (struct sockaddr *)&addr, len); + if (!ASSERT_OK(err, "connect")) + goto close; + + /* Add the socket to the sockmap, attaching a psock. */ + err = bpf_map_update_elem(map, &zero, &s, BPF_ANY); + if (!ASSERT_OK(err, "sockmap update elem")) + goto close; + + /* Installing the TLS ULP is allowed, it does not touch the datapath. */ + err = setsockopt(s, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) + goto close; + + /* Enabling the TLS crypto datapath must be rejected. */ + err = setsockopt(s, SOL_TLS, TLS_TX, &crypto, sizeof(crypto)); + ASSERT_ERR(err, "setsockopt(TLS_TX)"); + +close: + close(s); +} + static const char *fmt_test_name(const char *subtest_name, int family, enum bpf_map_type map_type) { @@ -160,249 +221,6 @@ out: close(p); } -static void test_sockmap_ktls_tx_cork(int family, int sotype, bool push) -{ - int err, off; - int i, j; - int start_push = 0, push_len = 0; - int c = 0, p = 0, one = 1, sent, recvd; - int prog_fd, map_fd; - char msg[12] = "hello world\0"; - char rcv[20] = {0}; - struct test_sockmap_ktls *skel; - - skel = test_sockmap_ktls__open_and_load(); - if (!ASSERT_TRUE(skel, "open ktls skel")) - return; - - err = create_pair(family, sotype, &c, &p); - if (!ASSERT_OK(err, "create_pair()")) - goto out; - - prog_fd = bpf_program__fd(skel->progs.prog_sk_policy); - map_fd = bpf_map__fd(skel->maps.sock_map); - - err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); - if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) - goto out; - - err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); - if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) - goto out; - - err = init_ktls_pairs(c, p); - if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) - goto out; - - skel->bss->cork_byte = sizeof(msg); - if (push) { - start_push = 1; - push_len = 2; - } - skel->bss->push_start = start_push; - skel->bss->push_end = push_len; - - off = sizeof(msg) / 2; - sent = send(c, msg, off, 0); - if (!ASSERT_EQ(sent, off, "send(msg)")) - goto out; - - recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); - if (!ASSERT_EQ(-1, recvd, "expected no data")) - goto out; - - /* send remaining msg */ - sent = send(c, msg + off, sizeof(msg) - off, 0); - if (!ASSERT_EQ(sent, sizeof(msg) - off, "send remaining data")) - goto out; - - recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); - if (!ASSERT_OK(err, "recv(msg)") || - !ASSERT_EQ(recvd, sizeof(msg) + push_len, "check length mismatch")) - goto out; - - for (i = 0, j = 0; i < recvd;) { - /* skip checking the data that has been pushed in */ - if (i >= start_push && i <= start_push + push_len - 1) { - i++; - continue; - } - if (!ASSERT_EQ(rcv[i], msg[j], "data mismatch")) - goto out; - i++; - j++; - } -out: - if (c) - close(c); - if (p) - close(p); - test_sockmap_ktls__destroy(skel); -} - -static void test_sockmap_ktls_tx_no_buf(int family, int sotype, bool push) -{ - int c = -1, p = -1, one = 1, two = 2; - struct test_sockmap_ktls *skel; - unsigned char *data = NULL; - struct msghdr msg = {0}; - struct iovec iov[2]; - int prog_fd, map_fd; - int txrx_buf = 1024; - int iov_length = 8192; - int err; - - skel = test_sockmap_ktls__open_and_load(); - if (!ASSERT_TRUE(skel, "open ktls skel")) - return; - - err = create_pair(family, sotype, &c, &p); - if (!ASSERT_OK(err, "create_pair()")) - goto out; - - err = setsockopt(c, SOL_SOCKET, SO_RCVBUFFORCE, &txrx_buf, sizeof(int)); - err |= setsockopt(p, SOL_SOCKET, SO_SNDBUFFORCE, &txrx_buf, sizeof(int)); - if (!ASSERT_OK(err, "set buf limit")) - goto out; - - prog_fd = bpf_program__fd(skel->progs.prog_sk_policy_redir); - map_fd = bpf_map__fd(skel->maps.sock_map); - - err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); - if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) - goto out; - - err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); - if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) - goto out; - - err = bpf_map_update_elem(map_fd, &two, &p, BPF_NOEXIST); - if (!ASSERT_OK(err, "bpf_map_update_elem(p)")) - goto out; - - skel->bss->apply_bytes = 1024; - - err = init_ktls_pairs(c, p); - if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) - goto out; - - data = calloc(iov_length, sizeof(char)); - if (!data) - goto out; - - iov[0].iov_base = data; - iov[0].iov_len = iov_length; - iov[1].iov_base = data; - iov[1].iov_len = iov_length; - msg.msg_iov = iov; - msg.msg_iovlen = 2; - - for (;;) { - err = sendmsg(c, &msg, MSG_DONTWAIT); - if (err <= 0) - break; - } - -out: - if (data) - free(data); - if (c != -1) - close(c); - if (p != -1) - close(p); - - test_sockmap_ktls__destroy(skel); -} - -static void test_sockmap_ktls_tx_pop(int family, int sotype) -{ - char msg[37] = "0123456789abcdefghijklmnopqrstuvwxyz\0"; - int c = 0, p = 0, one = 1, sent, recvd; - struct test_sockmap_ktls *skel; - int prog_fd, map_fd; - char rcv[50] = {0}; - int err; - int i, m, r; - - skel = test_sockmap_ktls__open_and_load(); - if (!ASSERT_TRUE(skel, "open ktls skel")) - return; - - err = create_pair(family, sotype, &c, &p); - if (!ASSERT_OK(err, "create_pair()")) - goto out; - - prog_fd = bpf_program__fd(skel->progs.prog_sk_policy); - map_fd = bpf_map__fd(skel->maps.sock_map); - - err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); - if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) - goto out; - - err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); - if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) - goto out; - - err = init_ktls_pairs(c, p); - if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) - goto out; - - struct { - int pop_start; - int pop_len; - } pop_policy[] = { - /* trim the start */ - {0, 2}, - {0, 10}, - {1, 2}, - {1, 10}, - /* trim the end */ - {35, 2}, - /* New entries should be added before this line */ - {-1, -1}, - }; - - i = 0; - while (pop_policy[i].pop_start >= 0) { - skel->bss->pop_start = pop_policy[i].pop_start; - skel->bss->pop_end = pop_policy[i].pop_len; - - sent = send(c, msg, sizeof(msg), 0); - if (!ASSERT_EQ(sent, sizeof(msg), "send(msg)")) - goto out; - - recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); - if (!ASSERT_EQ(recvd, sizeof(msg) - pop_policy[i].pop_len, "pop len mismatch")) - goto out; - - /* verify the data |
