From 4a1b1ac2744694a2ecd66a84bdb1445f4ef24bee Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 26 May 2026 12:40:25 -0300 Subject: RDMA/core: Validate the passed in fops for ib_get_ucaps() Sashiko pointed out it is not safe to rely only on the devt because char/block alias so if the user finds a block device with the same dev_t it can masquerade as a ucap cdev fd. Test the f_ops to only accept authentic cdevs. Link: https://patch.msgid.link/r/0-v1-fd9482545e37+1e25-ib_ucaps_fd_ops_jgg@nvidia.com Cc: stable@vger.kernel.org Fixes: 61e51682816d ("RDMA/uverbs: Introduce UCAP (User CAPabilities) API") Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucaps.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/ucaps.c b/drivers/infiniband/core/ucaps.c index 948093260dbd..5155ff0e538e 100644 --- a/drivers/infiniband/core/ucaps.c +++ b/drivers/infiniband/core/ucaps.c @@ -82,14 +82,12 @@ static int get_ucap_from_devt(dev_t devt, u64 *idx_mask) static int get_devt_from_fd(unsigned int fd, dev_t *ret_dev) { - struct file *file; + CLASS(fd, f)(fd); - file = fget(fd); - if (!file) + if (fd_empty(f) || fd_file(f)->f_op != &ucaps_cdev_fops) return -EBADF; - *ret_dev = file_inode(file)->i_rdev; - fput(file); + *ret_dev = file_inode(fd_file(f))->i_rdev; return 0; } -- cgit v1.2.3 From b230b57bd6f242aaac3b8e62c7d18c69e1e30392 Mon Sep 17 00:00:00 2001 From: Yonatan Nachum Date: Tue, 26 May 2026 08:15:36 +0000 Subject: RDMA/efa: Validate SQ ring size against max LLQ size Validate the SQ ring size against the device's max LLQ size. This ensures that when using 128-byte WQEs, userspace cannot exceed the queue limits. On create QP, userspace provides the SQ ring size (depth x WQE size) which is validated against the max LLQ size. Fixes: 40909f664d27 ("RDMA/efa: Add EFA verbs implementation") Link: https://patch.msgid.link/r/20260526081536.1203553-1-ynachum@amazon.com Reviewed-by: Michael Margolin Signed-off-by: Yonatan Nachum Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_verbs.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 7bd0838ebc99..9b2b652800e4 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -613,7 +613,8 @@ err_remove_mmap: } static int efa_qp_validate_cap(struct efa_dev *dev, - struct ib_qp_init_attr *init_attr) + struct ib_qp_init_attr *init_attr, + u32 sq_ring_size) { if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) { ibdev_dbg(&dev->ibdev, @@ -622,6 +623,14 @@ static int efa_qp_validate_cap(struct efa_dev *dev, dev->dev_attr.max_sq_depth); return -EINVAL; } + + if (sq_ring_size > dev->dev_attr.max_llq_size) { + ibdev_dbg(&dev->ibdev, + "qp: requested sq ring size[%u] exceeds the max[%u]\n", + sq_ring_size, dev->dev_attr.max_llq_size); + return -EINVAL; + } + if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) { ibdev_dbg(&dev->ibdev, "qp: requested receive wr[%u] exceeds the max[%u]\n", @@ -691,14 +700,6 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext, ibucontext); - err = efa_qp_validate_cap(dev, init_attr); - if (err) - goto err_out; - - err = efa_qp_validate_attr(dev, init_attr); - if (err) - goto err_out; - err = ib_copy_validate_udata_in_cm(udata, cmd, driver_qp_type, 0); if (err) goto err_out; @@ -720,6 +721,14 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, goto err_out; } + err = efa_qp_validate_cap(dev, init_attr, cmd.sq_ring_size); + if (err) + goto err_out; + + err = efa_qp_validate_attr(dev, init_attr); + if (err) + goto err_out; + create_qp_params.uarn = ucontext->uarn; create_qp_params.pd = to_epd(ibqp->pd)->pdn; -- cgit v1.2.3 From 15fe76e23615f502d051ef0768f86babaf08746c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 1 Jun 2026 13:52:31 -0300 Subject: RDMA/umem: Fix truncation for block sizes >= 4G When the iommu is used the linearization of the mapping can give a single block that is very large split across multiple SG entries. When __rdma_block_iter_next() reassembles the split SG entries it is overflowing the 32 bit stack values and computed the wrong DMA addresses for blocks after the truncation. Use the right types to hold DMA addresses. Link: https://patch.msgid.link/r/1-v1-88303e9e509f+f7-ib_umem_types_jgg@nvidia.com Cc: stable@vger.kernel.org Fixes: a808273a495c ("RDMA/verbs: Add a DMA iterator to return aligned contiguous memory blocks") Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/iter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/iter.c b/drivers/infiniband/core/iter.c index 8e543d100657..3ed351e8fcf6 100644 --- a/drivers/infiniband/core/iter.c +++ b/drivers/infiniband/core/iter.c @@ -19,8 +19,8 @@ EXPORT_SYMBOL(__rdma_block_iter_start); bool __rdma_block_iter_next(struct ib_block_iter *biter) { - unsigned int block_offset; - unsigned int delta; + dma_addr_t block_offset; + dma_addr_t delta; if (!biter->__sg_nents || !biter->__sg) return false; -- cgit v1.2.3 From 323c98a4ff06aa28114f2bf658fb43eb3b536bbc Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 25 May 2026 17:21:36 +0300 Subject: RDMA/core: Validate cpu_id against nr_cpu_ids in DMAH alloc The cpu_id attribute supplied by user space through UVERBS_ATTR_ALLOC_DMAH_CPU_ID is passed directly to cpumask_test_cpu() without first verifying that the value is within the valid CPU range. Passing such untrusted data to cpumask_test_cpu() may lead to an out-of-bounds read of the underlying cpumask bitmap: the helper expands to a test_bit() that indexes the bitmap by cpu_id / BITS_PER_LONG with no bound check. In addition, on kernels built with CONFIG_DEBUG_PER_CPU_MAPS it trips the WARN_ON_ONCE() in cpumask_check(); combined with panic_on_warn this turns a bad user input into a machine reboot. Reject any cpu_id that is not smaller than nr_cpu_ids with -EINVAL before it is used. Reported by Smatch. Fixes: d83edab562a4 ("RDMA/core: Introduce a DMAH object and its alloc/free APIs") Link: https://patch.msgid.link/r/20260525142136.28165-1-yishaih@nvidia.com Cc: stable@vger.kernel.org Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/ag68qoAW3P04J7pT@stanley.mountain/ Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_dmah.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/core/uverbs_std_types_dmah.c b/drivers/infiniband/core/uverbs_std_types_dmah.c index 453ce656c6f2..97101e093826 100644 --- a/drivers/infiniband/core/uverbs_std_types_dmah.c +++ b/drivers/infiniband/core/uverbs_std_types_dmah.c @@ -47,6 +47,11 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DMAH_ALLOC)( if (ret) goto err; + if (dmah->cpu_id >= nr_cpu_ids) { + ret = -EINVAL; + goto err; + } + if (!cpumask_test_cpu(dmah->cpu_id, current->cpus_ptr)) { ret = -EPERM; goto err; -- cgit v1.2.3 From badad6fad60def1b9805559dd81dbab3d97b82aa Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 4 Jun 2026 15:03:13 -0300 Subject: RDMA: During rereg_mr ensure that REREG_ACCESS is compatible If IB_MR_REREG_ACCESS changes from RO to RW then the umem has to be re-evaluated to ensure it is properly pinned as RW. Since the umem is hidden inside each driver's mr struct add a ib_umem_check_rereg() function that each driver has to call before processing IB_MR_REREG_ACCESS. mlx4 has to retain its duplicate ib_access_writable check because it implements IB_MR_REREG_ACCESS | IB_MR_REREG_TRANS by changing both items in place sequentially while the MR is live, so it will continue to not support this combination. Cc: stable@vger.kernel.org Fixes: b40656aa7d55 ("RDMA/umem: remove FOLL_FORCE usage") Link: https://patch.msgid.link/r/0-v1-06fb1a2d6cf5+107-rereg_access_jgg@nvidia.com Reported-by: Philip Tsukerman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 16 ++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++++ drivers/infiniband/hw/irdma/verbs.c | 4 ++++ drivers/infiniband/hw/mlx4/mr.c | 4 ++++ drivers/infiniband/hw/mlx5/mr.c | 4 ++++ drivers/infiniband/sw/rxe/rxe_verbs.c | 5 +++++ 6 files changed, 37 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 786fa1aa8e55..4b055712b0d0 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -332,3 +332,19 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, return 0; } EXPORT_SYMBOL(ib_umem_copy_from); + +/* + * Called during rereg mr if the driver is able to re-use a umem for + * IB_MR_REREG_ACCESS. + */ +int ib_umem_check_rereg(struct ib_umem *umem, int flags, int new_access_flags) +{ + if (!umem) + return 0; + + if ((flags & IB_MR_REREG_ACCESS) && !(flags & IB_MR_REREG_TRANS)) + if (ib_access_writable(new_access_flags) && !umem->writable) + return -EACCES; + return 0; +} +EXPORT_SYMBOL(ib_umem_check_rereg); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 896af1828a38..25bfd3970f5b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -300,6 +300,10 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, goto err_out; } + ret = ib_umem_check_rereg(mr->pbl_mtr.umem, flags, mr_access_flags); + if (ret) + goto err_out; + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); ret = PTR_ERR_OR_ZERO(mailbox); if (ret) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 17086048d2d7..8cd427532805 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3803,6 +3803,10 @@ static struct ib_mr *irdma_rereg_user_mr(struct ib_mr *ib_mr, int flags, if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) return ERR_PTR(-EOPNOTSUPP); + ret = ib_umem_check_rereg(iwmr->region, flags, new_access); + if (ret) + return ERR_PTR(ret); + if (dmabuf_revocable) { umem_dmabuf = to_ib_umem_dmabuf(iwmr->region); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 650b4a9121ff..6747bca30677 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -209,6 +209,10 @@ struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, struct mlx4_mpt_entry **pmpt_entry = &mpt_entry; int err; + err = ib_umem_check_rereg(mmr->umem, flags, mr_access_flags); + if (err) + return ERR_PTR(err); + /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs, * we assume that the calls can't run concurrently. Otherwise, a * race exists. diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3b6da45061a5..fb40b44496f4 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1179,6 +1179,10 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) return ERR_PTR(-EOPNOTSUPP); + err = ib_umem_check_rereg(mr->umem, flags, new_access_flags); + if (err) + return ERR_PTR(err); + if (!(flags & IB_MR_REREG_ACCESS)) new_access_flags = mr->access_flags; if (!(flags & IB_MR_REREG_PD)) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 4d4891dc2884..4cf04a44189c 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1319,6 +1319,7 @@ static struct ib_mr *rxe_rereg_user_mr(struct ib_mr *ibmr, int flags, struct rxe_mr *mr = to_rmr(ibmr); struct rxe_pd *old_pd = to_rpd(ibmr->pd); struct rxe_pd *pd = to_rpd(ibpd); + int err; /* for now only support the two easy cases: * rereg_pd and rereg_access @@ -1328,6 +1329,10 @@ static struct ib_mr *rxe_rereg_user_mr(struct ib_mr *ibmr, int flags, return ERR_PTR(-EOPNOTSUPP); } + err = ib_umem_check_rereg(mr->umem, flags, access); + if (err) + return ERR_PTR(err); + if (flags & IB_MR_REREG_PD) { rxe_put(old_pd); rxe_get(pd); -- cgit v1.2.3 From 29e7b925ae6df64894e82ab6419994dc25580a8a Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Tue, 2 Jun 2026 15:46:42 -0400 Subject: IB/isert: Reject login PDUs shorter than ISER_HEADERS_LEN In drivers/infiniband/ulp/isert/ib_isert.c, isert_login_recv_done() computes the login request payload length as wc->byte_len minus ISER_HEADERS_LEN with no lower bound, and login_req_len is a signed int. A remote iSER initiator can post a login Send work request carrying fewer than ISER_HEADERS_LEN (76) bytes, so the subtraction underflows and login_req_len becomes negative. isert_rx_login_req() then reads that negative length back into a signed int, takes size = min(rx_buflen, MAX_KEY_VALUE_PAIRS), and because the min() is signed it keeps the negative value; the value is then passed as the memcpy() length and sign-extended to a multi-gigabyte size_t. The copy into the 8192-byte login->req_buf runs far out of bounds and faults, crashing the target node. The login phase precedes iSCSI authentication, so no credentials are required to reach this path. Reject any login PDU shorter than ISER_HEADERS_LEN before the subtraction, mirroring the existing early return on a failed work completion, so login_req_len can never go negative. The upper bound was already safe: a posted login buffer cannot deliver more than ISER_RX_PAYLOAD_SIZE, so the difference stays at or below MAX_KEY_VALUE_PAIRS and the existing min() clamps it; only the missing lower bound needs to be added. Fixes: b8d26b3be8b3 ("iser-target: Add iSCSI Extensions for RDMA (iSER) target driver") Link: https://patch.msgid.link/r/20260602194642.2273217-1-michael.bommarito@gmail.com Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Michael Bommarito Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/isert/ib_isert.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 348005e71891..1015a51f750a 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1383,6 +1383,12 @@ isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc) ib_dma_sync_single_for_cpu(ib_dev, isert_conn->login_desc->dma_addr, ISER_RX_SIZE, DMA_FROM_DEVICE); + if (unlikely(wc->byte_len < ISER_HEADERS_LEN)) { + isert_dbg("login request length %u is too short\n", + wc->byte_len); + return; + } + isert_conn->login_req_len = wc->byte_len - ISER_HEADERS_LEN; if (isert_conn->conn) { -- cgit v1.2.3 From 13e91fd076306f5d0cdfa14f53d69e37274723c4 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Tue, 2 Jun 2026 18:04:57 -0400 Subject: RDMA/srp: bound SRP_RSP sense copy by the received length srp_process_rsp() copies sense data from rsp->data + resp_data_len, where resp_data_len is the full 32-bit value supplied by the SRP target and is never checked against the number of bytes actually received (wc->byte_len). The copy length is bounded to SCSI_SENSE_BUFFERSIZE, so at most 96 bytes are copied, but the source offset is not bounded. A malicious or compromised SRP target on the InfiniBand/RoCE fabric that the initiator has logged into can return an SRP_RSP with SRP_RSP_FLAG_SNSVALID set and a large resp_data_len. The receive buffer is allocated at the target-chosen max_ti_iu_len, so the source of the sense copy lands past the bytes actually received; with resp_data_len near 0xFFFFFFFF it is gigabytes past the buffer and the read faults. Copy the sense data only if it has not been truncated, that is, only if the response header, the response data, and the sense region fit within the bytes actually received; otherwise drop the sense and log. The in-tree iSER and NVMe-RDMA receive paths already bound their parse by wc->byte_len; this brings ib_srp into line with them. Fixes: aef9ec39c47f ("IB: Add SCSI RDMA Protocol (SRP) initiator") Link: https://patch.msgid.link/r/20260602220457.2542840-1-michael.bommarito@gmail.com Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Michael Bommarito Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srp/ib_srp.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index b58868e1cf11..acbd787de265 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1932,7 +1932,8 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) return ib_post_recv(ch->qp, &wr, NULL); } -static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) +static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp, + u32 byte_len) { struct srp_target_port *target = ch->target; struct srp_request *req; @@ -1973,10 +1974,27 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) scmnd->result = rsp->status; if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { - memcpy(scmnd->sense_buffer, rsp->data + - be32_to_cpu(rsp->resp_data_len), - min_t(int, be32_to_cpu(rsp->sense_data_len), - SCSI_SENSE_BUFFERSIZE)); + u32 resp_len = be32_to_cpu(rsp->resp_data_len); + u32 sense_len = be32_to_cpu(rsp->sense_data_len); + + /* + * The sense data starts resp_data_len bytes past the + * response data area; both lengths come from the + * target-controlled response. Copy the sense data + * only if it has not been truncated, that is, only if + * the full sense region fits within the bytes actually + * received. Otherwise the copy source would run past + * the receive buffer (sized to the target-chosen + * max_ti_iu_len), reading out of bounds. + */ + if (sizeof(*rsp) + (u64)resp_len + sense_len <= byte_len) + memcpy(scmnd->sense_buffer, + rsp->data + resp_len, + min(sense_len, SCSI_SENSE_BUFFERSIZE)); + else + shost_printk(KERN_ERR, target->scsi_host, + "dropping truncated sense data (resp_data_len %u sense_data_len %u, %u bytes received)\n", + resp_len, sense_len, byte_len); } if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER)) @@ -2086,7 +2104,7 @@ static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc) switch (opcode) { case SRP_RSP: - srp_process_rsp(ch, iu->buf); + srp_process_rsp(ch, iu->buf, wc->byte_len); break; case SRP_CRED_REQ: -- cgit v1.2.3