diff options
| author | Trond Myklebust <trond.myklebust@hammerspace.com> | 2018-06-04 18:57:13 -0400 |
|---|---|---|
| committer | Trond Myklebust <trond.myklebust@hammerspace.com> | 2018-06-04 18:57:13 -0400 |
| commit | fcda3d5d221bbfc469415b0fa7dc4eb87d90d955 (patch) | |
| tree | a428ad27d04527a1a17973262d00b35f037eecc3 | |
| parent | 3f0b3cf46e0542ac4b4241c579b944b755d11b67 (diff) | |
| parent | 11d0ac16b02eab8cda32efcb51bfab452dab760b (diff) | |
Merge tag 'nfs-rdma-for-4.18-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
NFS-over-RDMA client updates for Linux 4.18
Stable patches:
- xprtrdma: Return -ENOBUFS when no pages are available
New features:
- Add ->alloc_slot() and ->free_slot() functions
Bugfixes and cleanups:
- Add missing SPDX tags to some files
- Try to fail mount quickly if client has no RDMA devices
- Create transport IDs in the correct network namespace
- Fix max_send_wr computation
- Clean up receive tracepoints
- Refactor receive handling
- Remove unused functions
| -rw-r--r-- | include/linux/sunrpc/rpc_rdma.h | 1 | ||||
| -rw-r--r-- | include/linux/sunrpc/xprt.h | 6 | ||||
| -rw-r--r-- | include/linux/sunrpc/xprtrdma.h | 1 | ||||
| -rw-r--r-- | include/trace/events/rpcrdma.h | 76 | ||||
| -rw-r--r-- | net/sunrpc/clnt.c | 1 | ||||
| -rw-r--r-- | net/sunrpc/xprt.c | 17 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/backchannel.c | 105 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/fmr_ops.c | 23 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/frwr_ops.c | 31 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/module.c | 1 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 66 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 1 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 64 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 291 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 26 | ||||
| -rw-r--r-- | net/sunrpc/xprtsock.c | 4 |
16 files changed, 359 insertions, 355 deletions
diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index 8f144db73e38..92d182fd8e3b 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (c) 2015-2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 5fea0fb420df..336fd1a19cca 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -84,7 +84,6 @@ struct rpc_rqst { void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; - void *rq_xprtdata; /* Per-xprt private data */ void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize; void *rq_rbuffer; /* Reply XDR decode buffer */ @@ -127,6 +126,8 @@ struct rpc_xprt_ops { int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*alloc_slot)(struct rpc_xprt *xprt, struct rpc_task *task); + void (*free_slot)(struct rpc_xprt *xprt, + struct rpc_rqst *req); void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); @@ -324,10 +325,13 @@ struct xprt_class { struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); +void xprt_request_init(struct rpc_task *task); void xprt_retry_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); +void xprt_free_slot(struct rpc_xprt *xprt, + struct rpc_rqst *req); void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); bool xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 5859563e3c1f..86fc38ff0355 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 50ed3f8bf534..c4494a2b3ecd 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -528,24 +528,54 @@ TRACE_EVENT(xprtrdma_post_send, TRACE_EVENT(xprtrdma_post_recv, TP_PROTO( - const struct rpcrdma_rep *rep, + const struct ib_cqe *cqe + ), + + TP_ARGS(cqe), + + TP_STRUCT__entry( + __field(const void *, cqe) + ), + + TP_fast_assign( + __entry->cqe = cqe; + ), + + TP_printk("cqe=%p", + __entry->cqe + ) +); + +TRACE_EVENT(xprtrdma_post_recvs, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt, + unsigned int count, int status ), - TP_ARGS(rep, status), + TP_ARGS(r_xprt, count, status), TP_STRUCT__entry( - __field(const void *, rep) + __field(const void *, r_xprt) + __field(unsigned int, count) __field(int, status) + __field(int, posted) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) ), TP_fast_assign( - __entry->rep = rep; + __entry->r_xprt = r_xprt; + __entry->count = count; __entry->status = status; + __entry->posted = r_xprt->rx_buf.rb_posted_receives; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("rep=%p status=%d", - __entry->rep, __entry->status + TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->count, __entry->posted, __entry->status ) ); @@ -584,28 +614,32 @@ TRACE_EVENT(xprtrdma_wc_send, TRACE_EVENT(xprtrdma_wc_receive, TP_PROTO( - const struct rpcrdma_rep *rep, const struct ib_wc *wc ), - TP_ARGS(rep, wc), + TP_ARGS(wc), TP_STRUCT__entry( - __field(const void *, rep) - __field(unsigned int, byte_len) + __field(const void *, cqe) + __field(u32, byte_len) __field(unsigned int, status) - __field(unsigned int, vendor_err) + __field(u32, vendor_err) ), TP_fast_assign( - __entry->rep = rep; - __entry->byte_len = wc->byte_len; + __entry->cqe = wc->wr_cqe; __entry->status = wc->status; - __entry->vendor_err = __entry->status ? wc->vendor_err : 0; + if (wc->status) { + __entry->byte_len = 0; + __entry->vendor_err = wc->vendor_err; + } else { + __entry->byte_len = wc->byte_len; + __entry->vendor_err = 0; + } ), - TP_printk("rep=%p, %u bytes: %s (%u/0x%x)", - __entry->rep, __entry->byte_len, + TP_printk("cqe=%p %u bytes: %s (%u/0x%x)", + __entry->cqe, __entry->byte_len, rdma_show_wc_status(__entry->status), __entry->status, __entry->vendor_err ) @@ -616,6 +650,7 @@ DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li); DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake); DEFINE_MR_EVENT(xprtrdma_localinv); +DEFINE_MR_EVENT(xprtrdma_dma_map); DEFINE_MR_EVENT(xprtrdma_dma_unmap); DEFINE_MR_EVENT(xprtrdma_remoteinv); DEFINE_MR_EVENT(xprtrdma_recover_mr); @@ -799,7 +834,6 @@ TRACE_EVENT(xprtrdma_allocate, __field(unsigned int, task_id) __field(unsigned int, client_id) __field(const void *, req) - __field(const void *, rep) __field(size_t, callsize) __field(size_t, rcvsize) ), @@ -808,15 +842,13 @@ TRACE_EVENT(xprtrdma_allocate, __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; __entry->req = req; - __entry->rep = req ? req->rl_reply : NULL; __entry->callsize = task->tk_rqstp->rq_callsize; __entry->rcvsize = task->tk_rqstp->rq_rcvsize; ), - TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)", + TP_printk("task:%u@%u req=%p (%zu, %zu)", __entry->task_id, __entry->client_id, - __entry->req, __entry->rep, - __entry->callsize, __entry->rcvsize + __entry->req, __entry->callsize, __entry->rcvsize ) ); @@ -848,8 +880,6 @@ TRACE_EVENT(xprtrdma_rpc_done, ) ); -DEFINE_RXPRT_EVENT(xprtrdma_noreps); - /** ** Callback events **/ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c2266f387213..d839c33ae7d9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1546,6 +1546,7 @@ call_reserveresult(struct rpc_task *task) task->tk_status = 0; if (status >= 0) { if (task->tk_rqstp) { + xprt_request_init(task); task->tk_action = call_refresh; return; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 70f005044f06..3c85af058227 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -66,7 +66,7 @@ * Local functions */ static void xprt_init(struct rpc_xprt *xprt, struct net *net); -static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); +static __be32 xprt_alloc_xid(struct rpc_xprt *xprt); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *); @@ -987,6 +987,8 @@ bool xprt_prepare_transmit(struct rpc_task *task) task->tk_status = -EAGAIN; goto out_unlock; } + if (!bc_prealloc(req) && !req->rq_xmit_bytes_sent) + req->rq_xid = xprt_alloc_xid(xprt); ret = true; out_unlock: spin_unlock_bh(&xprt->transport_lock); @@ -1163,10 +1165,10 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) out_init_req: xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots, xprt->num_reqs); + spin_unlock(&xprt->reserve_lock); + task->tk_status = 0; task->tk_rqstp = req; - xprt_request_init(task, xprt); - spin_unlock(&xprt->reserve_lock); } EXPORT_SYMBOL_GPL(xprt_alloc_slot); @@ -1184,7 +1186,7 @@ void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) } EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot); -static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) +void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) { spin_lock(&xprt->reserve_lock); if (!xprt_dynamic_free_slot(xprt, req)) { @@ -1194,6 +1196,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) xprt_wake_up_backlog(xprt); spin_unlock(&xprt->reserve_lock); } +EXPORT_SYMBOL_GPL(xprt_free_slot); static void xprt_free_all_slots(struct rpc_xprt *xprt) { @@ -1303,8 +1306,9 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt) xprt->xid = prandom_u32(); } -static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) +void xprt_request_init(struct rpc_task *task) { + struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; INIT_LIST_HEAD(&req->rq_list); @@ -1312,7 +1316,6 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_task = task; req->rq_xprt = xprt; req->rq_buffer = NULL; - req->rq_xid = xprt_alloc_xid(xprt); req->rq_connect_cookie = xprt->connect_cookie - 1; req->rq_bytes_sent = 0; req->rq_snd_buf.len = 0; @@ -1373,7 +1376,7 @@ void xprt_release(struct rpc_task *task) dprintk("RPC: %5u release request %p\n", task->tk_pid, req); if (likely(!bc_prealloc(req))) - xprt_free_slot(xprt, req); + xprt->ops->free_slot(xprt, req); else xprt_free_bc_request(req); } diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 47ebac949769..c8f1c2b89dad 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -29,29 +29,41 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, spin_unlock(&buf->rb_reqslock); rpcrdma_destroy_req(req); - - kfree(rqst); } -static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, - struct rpc_rqst *rqst) +static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, + unsigned int count) { - struct rpcrdma_regbuf *rb; - struct rpcrdma_req *req; - size_t size; + struct rpc_xprt *xprt = &r_xprt->rx_xprt; + struct rpc_rqst *rqst; + unsigned int i; + + for (i = 0; i < (count << 1); i++) { + struct rpcrdma_regbuf *rb; + struct rpcrdma_req *req; + size_t size; + + req = rpcrdma_create_req(r_xprt); + if (IS_ERR(req)) + return PTR_ERR(req); + rqst = &req->rl_slot; + + rqst->rq_xprt = xprt; + INIT_LIST_HEAD(&rqst->rq_list); + INIT_LIST_HEAD(&rqst->rq_bc_list); + __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); + spin_lock_bh(&xprt->bc_pa_lock); + list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); + spin_unlock_bh(&xprt->bc_pa_lock); - req = rpcrdma_create_req(r_xprt); - if (IS_ERR(req)) - return PTR_ERR(req); - - size = r_xprt->rx_data.inline_rsize; - rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); - if (IS_ERR(rb)) - goto out_fail; - req->rl_sendbuf = rb; - xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, - min_t(size_t, size, PAGE_SIZE)); - rpcrdma_set_xprtdata(rqst, req); + size = r_xprt->rx_data.inline_rsize; + rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); + if (IS_ERR(rb)) + goto out_fail; + req->rl_sendbuf = rb; + xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, + min_t(size_t, size, PAGE_SIZE)); + } return 0; out_fail: @@ -59,23 +71,6 @@ out_fail: return -ENOMEM; } -/* Allocate and add receive buffers to the rpcrdma_buffer's - * existing list of rep's. These are released when the - * transport is destroyed. - */ -static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, - unsigned int count) -{ - int rc = 0; - - while (count--) { - rc = rpcrdma_create_rep(r_xprt); - if (rc) - break; - } - return rc; -} - /** * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests * @xprt: transport associated with these backchannel resources @@ -86,9 +81,6 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; - struct rpc_rqst *rqst; - unsigned int i; int rc; /* The backchannel reply path returns each rpc_rqst to the @@ -103,35 +95,11 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) if (reqs > RPCRDMA_BACKWARD_WRS >> 1) goto out_err; - for (i = 0; i < (reqs << 1); i++) { - rqst = kzalloc(sizeof(*rqst), GFP_KERNEL); - if (!rqst) - goto out_free; - - dprintk("RPC: %s: new rqst %p\n", __func__, rqst); - - rqst->rq_xprt = &r_xprt->rx_xprt; - INIT_LIST_HEAD(&rqst->rq_list); - INIT_LIST_HEAD(&rqst->rq_bc_list); - __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); - - if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) - goto out_free; - - spin_lock_bh(&xprt->bc_pa_lock); - list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); - spin_unlock_bh(&xprt->bc_pa_lock); - } - - rc = rpcrdma_bc_setup_reps(r_xprt, reqs); + rc = rpcrdma_bc_setup_reqs(r_xprt, reqs); if (rc) goto out_free; - rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs); - if (rc) - goto out_free; - - buffer->rb_bc_srv_max_requests = reqs; + r_xprt->rx_buf.rb_bc_srv_max_requests = reqs; request_module("svcrdma"); trace_xprtrdma_cb_setup(r_xprt, reqs); return 0; @@ -235,6 +203,7 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst) if (rc < 0) goto failed_marshal; + rpcrdma_post_recvs(r_xprt, true); if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) goto drop_connection; return 0; @@ -275,10 +244,14 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs) */ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) { + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpc_xprt *xprt = rqst->rq_xprt; dprintk("RPC: %s: freeing rqst %p (req %p)\n", - __func__, rqst, rpcr_to_rdmar(rqst)); + __func__, rqst, req); + + rpcrdma_recv_buffer_put(req->rl_reply); + req->rl_reply = NULL; spin_lock_bh(&xprt->bc_pa_lock); list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index f2f63959fddd..58b472666255 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -156,10 +156,32 @@ out_release: fmr_op_release_mr(mr); } +/* On success, sets: + * ep->rep_attr.cap.max_send_wr + * ep->rep_attr.cap.max_recv_wr + * cdata->max_requests + * ia->ri_max_segs + */ static int fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata) { + int max_qp_wr; + + max_qp_wr = ia->ri_device->attrs.max_qp_wr; + max_qp_wr -= RPCRDMA_BACKWARD_WRS; + max_qp_wr -= 1; + if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) + return -ENOMEM; + if (cdata->max_requests > max_qp_wr) + cdata->max_requests = max_qp_wr; + ep->rep_attr.cap.max_send_wr = cdata->max_requests; + ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; + ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ + ep->rep_attr.cap.max_recv_wr = cdata->max_requests; + ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; + ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ + ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES); return 0; @@ -219,6 +241,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, mr->mr_sg, i, mr->mr_dir); if (!mr->mr_nents) goto out_dmamap_err; + trace_xprtrdma_dma_map(mr); for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++) dma_pages[i] = sg_dma_address(&mr->mr_sg[i]); diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index c59c5c788db0..d46dc7e6e30a 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -202,12 +202,22 @@ out_release: frwr_op_release_mr(mr); } +/* On success, sets: + * ep->rep_attr.cap.max_send_wr + * ep->rep_attr.cap.max_recv_wr + * cdata->max_requests + * ia->ri_max_segs + * + * And these FRWR-related fields: + * ia->ri_max_frwr_depth + * ia->ri_mrtype + */ static int frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata) { struct ib_device_attr *attrs = &ia->ri_device->attrs; - int depth, delta; + int max_qp_wr, depth, delta; ia->ri_mrtype = IB_MR_TYPE_MEM_REG; if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) @@ -241,14 +251,26 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, } while (delta > 0); } - ep->rep_attr.cap.max_send_wr *= depth; - if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) { - cdata->max_requests = attrs->max_qp_wr / depth; + max_qp_wr = ia->ri_device->attrs.max_qp_wr; + max_qp_wr -= RPCRDMA_BACKWARD_WRS; + max_qp_wr -= 1; + if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) + return -ENOMEM; + if (cdata->max_requests > max_qp_wr) + cdata->max_requests = max_qp_wr; + ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth; + if (ep->rep_attr.cap.max_send_wr > max_qp_wr) { + cdata->max_requests = max_qp_wr / depth; if (!cdata->max_requests) return -EINVAL; ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth; } + ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; + ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ + ep->rep_attr.cap.max_recv_wr = cdata->max_requests; + ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; + ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / ia->ri_max_frwr_depth); @@ -393,6 +415,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); if (!mr->mr_nents) goto out_dmamap_err; + trace_xprtrdma_dma_map(mr); ibmr = frwr->fr_mr; n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c index a762d192372b..f338065121f2 100644 --- a/net/sunrpc/xprtrdma/module.c +++ b/net/sunrpc/xprtrdma/module.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright (c) 2015, 2017 Oracle. All rights reserved. */ diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e8adad33d0bb..1c78516aa6f2 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright (c) 2014-2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. @@ -54,14 +55,6 @@ # define RPCDBG_FACILITY RPCDBG_TRANS #endif -static const char transfertypes[][12] = { - "inline", /* no chunks */ - "read list", /* some argument via rdma read */ - "*read list", /* entire request via rdma read */ - "write list", /* some result via rdma write */ - "reply chunk" /* entire reply via rdma write */ -}; - /* Returns size of largest RPC-over-RDMA header in a Call message * * The largest Call header contains a full-size Read list and a @@ -230,7 +223,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, */ *ppages = alloc_page(GFP_ATOMIC); if (!*ppages) - return -EAGAIN; + return -ENOBUFS; } seg->mr_page = *ppages; seg->mr_offset = (char *)page_base; @@ -365,7 +358,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, false, &mr); if (IS_ERR(seg)) - goto out_maperr; + return PTR_ERR(seg); rpcrdma_mr_push(mr, &req->rl_registered); if (encode_read_segment(xdr, mr, pos) < 0) @@ -377,11 +370,6 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, } while (nsegs); return 0; - -out_maperr: - if (PTR_ERR(seg) == -EAGAIN) - xprt_wait_for_buffer_space(rqst->rq_task, NULL); - return PTR_ERR(seg); } /* Register and XDR encode the Write list. Supports encoding a list @@ -428,7 +416,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, true, &mr); if (IS_ERR(seg)) - goto out_maperr; + return PTR_ERR(seg); rpcrdma_mr_push(mr, &req->rl_registered); if (encode_rdma_segment(xdr, mr) < 0) @@ -445,11 +433,6 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, *segcount = cpu_to_be32(nchunks); return 0; - -out_maperr: - if (PTR_ERR(seg) == -EAGAIN) - xprt_wait_for_buffer_space(rqst->rq_task, NULL); - return PTR_ERR(seg); } /* Register and XDR encode the Reply chunk. Supports encoding an array @@ -491,7 +474,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, true, &mr); if (IS_ERR(seg)) - goto out_maperr; + return PTR_ERR(seg); rpcrdma_mr_push(mr, &req->rl_registered); if (encode_rdma_segment(xdr, mr) < 0) @@ -508,11 +491,6 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, *segcount = cpu_to_be32(nchunks); return 0; - -out_maperr: - if (PTR_ERR(seg) == -EAGAIN) - xprt_wait_for_buffer_space(rqst->rq_task, NULL); - return PTR_ERR(seg); } /** @@ -709,7 +687,7 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, { req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf); if (!req->rl_sendctx) - return -ENOBUFS; + return -EAGAIN; req->rl_sendctx->sc_wr.num_sge = 0; req->rl_sendctx->sc_unmap_count = 0; req->rl_sendctx->sc_req = req; @@ -883,7 +861,15 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) return 0; out_err: - r_xprt->rx_stats.failed_marshal_count++; + switch (ret) { + case -EAGAIN: + xprt_wait_for_buffer_space(rqst->rq_task, NULL); + break; + case -ENOBUFS: + break; + default: + r_xprt->rx_stats.failed_marshal_count++; + } return ret; } @@ -1026,8 +1012,6 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) out_short: pr_warn("RPC/RDMA short backward direction call\n"); - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) - xprt_disconnect_done(&r_xprt->rx_xprt); return true; } #else /* CONFIG_SUNRPC_BACKCHANNEL */ @@ -1333,13 +1317,14 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) u32 credits; __be32 *p; + --buf->rb_posted_receives; + if (rep->rr_hdrbuf.head[0].iov_len == 0) goto out_badstatus; + /* Fixed transport header fields */ xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, rep->rr_hdrbuf.head[0].iov_base); - - /* Fixed transport header fields */ p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p)); if (unlikely(!p)) goto out_shortreply; @@ -1378,17 +1363,10 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); + rpcrdma_post_recvs(r_xprt, false); queue_work(rpcrdma_receive_wq, &rep->rr_work); return; -out_badstatus: - rpcrdma_recv_buffer_put(rep); - if (r_xprt->rx_ep.rep_connected == 1) { - r_xprt->rx_ep.rep_connected = -EIO; - rpcrdma_conn_func(&r_xprt->rx_ep); - } - return; - out_badversion: trace_xprtrdma_reply_vers(rep); goto repost; @@ -1408,7 +1386,7 @@ out_shortreply: * receive buffer before returning. */ repost: - r_xprt->rx_stats.bad_reply_count++; - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) - rpcrdma_recv_buffer_put(rep); + rpcrdma_post_recvs(r_xprt, false); +out_badstatus: + rpcrdma_recv_buffer_put(rep); } diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index a73632ca9048..1035516d54e2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -273,6 +273,7 @@ static const struct rpc_xprt_ops xprt_rdma_bc_procs = { .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, .alloc_slot = xprt_alloc_slot, + .free_slot = xprt_free_slot, .release_request = xprt_release_rqst_cong, .buf_alloc = xprt_rdma_bc_allocate, .buf_free = xprt_rdma_bc_free, diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index cc1aad325496..0c775f05123c 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright (c) 2014-2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. @@ -330,9 +331,7 @@ xprt_setup_rdma(struct xprt_create *args) return ERR_PTR(-EBADF); } - xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), - xprt_rdma_slot_table_entries, - xprt_rdma_slot_table_entries); + xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0); if (xprt == NULL) { dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", __func__); @@ -364,7 +363,7 @@ xprt_setup_rdma(struct xprt_create *args) xprt_set_bound(xprt); xprt_rdma_format_addresses(xprt, sap); - cdata.max_requests = xprt->max_reqs; + cdata.max_requests = xprt_rdma_slot_table_entries; cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ @@ -537,6 +536,47 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) } } +/** + * xprt_rdma_alloc_slot - allocate an rpc_rqst + * @xprt: controlling RPC transport + * @task: RPC task requesting a fresh rpc_rqst + * + * tk_status values: + * %0 if task->tk_rqstp points to a fresh rpc_rqst + * %-EAGAIN if no rpc_rqst is available; queued on backlog + */ +static void +xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_req *req; + + req = rpcrdma_buffer_get(&r_xprt->rx_buf); + if (!req) + goto out_sleep; + task->tk_rqstp = &req->rl_slot; + task->tk_status = 0; + return; + +out_sleep: + rpc_sleep_on(&xprt->backlog, task, NULL); + task->tk_status = -EAGAIN; +} + +/** + * xprt_rdma_free_slot - release an rpc_rqst + * @xprt: controlling RPC transport + * @rqst: rpc_rqst to release + * + */ +static void +xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) +{ + memset(rqst, 0, sizeof(*rqst)); + rpcrdma_buffer_put(rpcr_to_rdmar(rqst)); + rpc_wake_up_next(&xprt->backlog); +} + static bool rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, size_t size, gfp_t flags) @@ -607,13 +647,9 @@ xprt_rdma_allocate(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); - struct rpcrdma_req *req; + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); gfp_t flags; - req = rpcrdma_buffer_get(&r_xprt->rx_buf); - if (req == NULL) - goto out_get; - flags = RPCRDMA_DEF_GFP; if (RPC_IS_SWAPPER(task)) flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; @@ -623,15 +659,12 @@ xprt_rdma_allocate(struct rpc_task *task) if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) goto out_fail; - rpcrdma_set_xprtdata(rqst, req); rqst->rq_buffer = req->rl_sendbuf->rg_base; rqst->rq_rbuffer = req->rl_recvbuf->rg_base; trace_xprtrdma_allocate(task, req); return 0; out_fail: - rpcrdma_buffer_put(req); -out_get: trace_xprtrdma_allocate(task, NULL); return -ENOMEM; } @@ -652,7 +685,6 @@ xprt_rdma_free(struct rpc_task *task) if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) rpcrdma_release_rqst(r_xprt, req); trace_xprtrdma_rpc_done(task, req); - rpcrdma_buffer_put(req); } /** @@ -690,9 +722,6 @@ xprt_rdma_send_request(struct rpc_task *task) if (rc < 0) goto failed_marshal; - if (req->rl_reply == NULL) /* e.g. reconnection */ - rpcrdma_recv_buffer_get(req); - /* Must suppress retransmit to maintain credits */ if (rqst->rq_connect_cookie == xprt->connect_cookie) goto drop_connection; @@ -779,7 +808,8 @@ xprt_rdma_disable_swap(struct rpc_xprt *xprt) static const struct rpc_xprt_ops xprt_rdma_procs = { .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ - .alloc_slot = xprt_alloc_slot, + .alloc_slot = xprt_rdma_alloc_slot, + .free_slot = xp |
