aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTrond Myklebust <trond.myklebust@hammerspace.com>2018-06-04 18:57:13 -0400
committerTrond Myklebust <trond.myklebust@hammerspace.com>2018-06-04 18:57:13 -0400
commitfcda3d5d221bbfc469415b0fa7dc4eb87d90d955 (patch)
treea428ad27d04527a1a17973262d00b35f037eecc3
parent3f0b3cf46e0542ac4b4241c579b944b755d11b67 (diff)
parent11d0ac16b02eab8cda32efcb51bfab452dab760b (diff)
Merge tag 'nfs-rdma-for-4.18-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
NFS-over-RDMA client updates for Linux 4.18 Stable patches: - xprtrdma: Return -ENOBUFS when no pages are available New features: - Add ->alloc_slot() and ->free_slot() functions Bugfixes and cleanups: - Add missing SPDX tags to some files - Try to fail mount quickly if client has no RDMA devices - Create transport IDs in the correct network namespace - Fix max_send_wr computation - Clean up receive tracepoints - Refactor receive handling - Remove unused functions
-rw-r--r--include/linux/sunrpc/rpc_rdma.h1
-rw-r--r--include/linux/sunrpc/xprt.h6
-rw-r--r--include/linux/sunrpc/xprtrdma.h1
-rw-r--r--include/trace/events/rpcrdma.h76
-rw-r--r--net/sunrpc/clnt.c1
-rw-r--r--net/sunrpc/xprt.c17
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c105
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c23
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c31
-rw-r--r--net/sunrpc/xprtrdma/module.c1
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c66
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c1
-rw-r--r--net/sunrpc/xprtrdma/transport.c64
-rw-r--r--net/sunrpc/xprtrdma/verbs.c291
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h26
-rw-r--r--net/sunrpc/xprtsock.c4
16 files changed, 359 insertions, 355 deletions
diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
index 8f144db73e38..92d182fd8e3b 100644
--- a/include/linux/sunrpc/rpc_rdma.h
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright (c) 2015-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 5fea0fb420df..336fd1a19cca 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -84,7 +84,6 @@ struct rpc_rqst {
void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */
struct list_head rq_list;
- void *rq_xprtdata; /* Per-xprt private data */
void *rq_buffer; /* Call XDR encode buffer */
size_t rq_callsize;
void *rq_rbuffer; /* Reply XDR decode buffer */
@@ -127,6 +126,8 @@ struct rpc_xprt_ops {
int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*alloc_slot)(struct rpc_xprt *xprt, struct rpc_task *task);
+ void (*free_slot)(struct rpc_xprt *xprt,
+ struct rpc_rqst *req);
void (*rpcbind)(struct rpc_task *task);
void (*set_port)(struct rpc_xprt *xprt, unsigned short port);
void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task);
@@ -324,10 +325,13 @@ struct xprt_class {
struct rpc_xprt *xprt_create_transport(struct xprt_create *args);
void xprt_connect(struct rpc_task *task);
void xprt_reserve(struct rpc_task *task);
+void xprt_request_init(struct rpc_task *task);
void xprt_retry_reserve(struct rpc_task *task);
int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
+void xprt_free_slot(struct rpc_xprt *xprt,
+ struct rpc_rqst *req);
void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
bool xprt_prepare_transmit(struct rpc_task *task);
void xprt_transmit(struct rpc_task *task);
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index 5859563e3c1f..86fc38ff0355 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 50ed3f8bf534..c4494a2b3ecd 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -528,24 +528,54 @@ TRACE_EVENT(xprtrdma_post_send,
TRACE_EVENT(xprtrdma_post_recv,
TP_PROTO(
- const struct rpcrdma_rep *rep,
+ const struct ib_cqe *cqe
+ ),
+
+ TP_ARGS(cqe),
+
+ TP_STRUCT__entry(
+ __field(const void *, cqe)
+ ),
+
+ TP_fast_assign(
+ __entry->cqe = cqe;
+ ),
+
+ TP_printk("cqe=%p",
+ __entry->cqe
+ )
+);
+
+TRACE_EVENT(xprtrdma_post_recvs,
+ TP_PROTO(
+ const struct rpcrdma_xprt *r_xprt,
+ unsigned int count,
int status
),
- TP_ARGS(rep, status),
+ TP_ARGS(r_xprt, count, status),
TP_STRUCT__entry(
- __field(const void *, rep)
+ __field(const void *, r_xprt)
+ __field(unsigned int, count)
__field(int, status)
+ __field(int, posted)
+ __string(addr, rpcrdma_addrstr(r_xprt))
+ __string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
- __entry->rep = rep;
+ __entry->r_xprt = r_xprt;
+ __entry->count = count;
__entry->status = status;
+ __entry->posted = r_xprt->rx_buf.rb_posted_receives;
+ __assign_str(addr, rpcrdma_addrstr(r_xprt));
+ __assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("rep=%p status=%d",
- __entry->rep, __entry->status
+ TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)",
+ __get_str(addr), __get_str(port), __entry->r_xprt,
+ __entry->count, __entry->posted, __entry->status
)
);
@@ -584,28 +614,32 @@ TRACE_EVENT(xprtrdma_wc_send,
TRACE_EVENT(xprtrdma_wc_receive,
TP_PROTO(
- const struct rpcrdma_rep *rep,
const struct ib_wc *wc
),
- TP_ARGS(rep, wc),
+ TP_ARGS(wc),
TP_STRUCT__entry(
- __field(const void *, rep)
- __field(unsigned int, byte_len)
+ __field(const void *, cqe)
+ __field(u32, byte_len)
__field(unsigned int, status)
- __field(unsigned int, vendor_err)
+ __field(u32, vendor_err)
),
TP_fast_assign(
- __entry->rep = rep;
- __entry->byte_len = wc->byte_len;
+ __entry->cqe = wc->wr_cqe;
__entry->status = wc->status;
- __entry->vendor_err = __entry->status ? wc->vendor_err : 0;
+ if (wc->status) {
+ __entry->byte_len = 0;
+ __entry->vendor_err = wc->vendor_err;
+ } else {
+ __entry->byte_len = wc->byte_len;
+ __entry->vendor_err = 0;
+ }
),
- TP_printk("rep=%p, %u bytes: %s (%u/0x%x)",
- __entry->rep, __entry->byte_len,
+ TP_printk("cqe=%p %u bytes: %s (%u/0x%x)",
+ __entry->cqe, __entry->byte_len,
rdma_show_wc_status(__entry->status),
__entry->status, __entry->vendor_err
)
@@ -616,6 +650,7 @@ DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li);
DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake);
DEFINE_MR_EVENT(xprtrdma_localinv);
+DEFINE_MR_EVENT(xprtrdma_dma_map);
DEFINE_MR_EVENT(xprtrdma_dma_unmap);
DEFINE_MR_EVENT(xprtrdma_remoteinv);
DEFINE_MR_EVENT(xprtrdma_recover_mr);
@@ -799,7 +834,6 @@ TRACE_EVENT(xprtrdma_allocate,
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, req)
- __field(const void *, rep)
__field(size_t, callsize)
__field(size_t, rcvsize)
),
@@ -808,15 +842,13 @@ TRACE_EVENT(xprtrdma_allocate,
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->req = req;
- __entry->rep = req ? req->rl_reply : NULL;
__entry->callsize = task->tk_rqstp->rq_callsize;
__entry->rcvsize = task->tk_rqstp->rq_rcvsize;
),
- TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)",
+ TP_printk("task:%u@%u req=%p (%zu, %zu)",
__entry->task_id, __entry->client_id,
- __entry->req, __entry->rep,
- __entry->callsize, __entry->rcvsize
+ __entry->req, __entry->callsize, __entry->rcvsize
)
);
@@ -848,8 +880,6 @@ TRACE_EVENT(xprtrdma_rpc_done,
)
);
-DEFINE_RXPRT_EVENT(xprtrdma_noreps);
-
/**
** Callback events
**/
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c2266f387213..d839c33ae7d9 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1546,6 +1546,7 @@ call_reserveresult(struct rpc_task *task)
task->tk_status = 0;
if (status >= 0) {
if (task->tk_rqstp) {
+ xprt_request_init(task);
task->tk_action = call_refresh;
return;
}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 70f005044f06..3c85af058227 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -66,7 +66,7 @@
* Local functions
*/
static void xprt_init(struct rpc_xprt *xprt, struct net *net);
-static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
+static __be32 xprt_alloc_xid(struct rpc_xprt *xprt);
static void xprt_connect_status(struct rpc_task *task);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
@@ -987,6 +987,8 @@ bool xprt_prepare_transmit(struct rpc_task *task)
task->tk_status = -EAGAIN;
goto out_unlock;
}
+ if (!bc_prealloc(req) && !req->rq_xmit_bytes_sent)
+ req->rq_xid = xprt_alloc_xid(xprt);
ret = true;
out_unlock:
spin_unlock_bh(&xprt->transport_lock);
@@ -1163,10 +1165,10 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
out_init_req:
xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots,
xprt->num_reqs);
+ spin_unlock(&xprt->reserve_lock);
+
task->tk_status = 0;
task->tk_rqstp = req;
- xprt_request_init(task, xprt);
- spin_unlock(&xprt->reserve_lock);
}
EXPORT_SYMBOL_GPL(xprt_alloc_slot);
@@ -1184,7 +1186,7 @@ void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
}
EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot);
-static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
+void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
spin_lock(&xprt->reserve_lock);
if (!xprt_dynamic_free_slot(xprt, req)) {
@@ -1194,6 +1196,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
xprt_wake_up_backlog(xprt);
spin_unlock(&xprt->reserve_lock);
}
+EXPORT_SYMBOL_GPL(xprt_free_slot);
static void xprt_free_all_slots(struct rpc_xprt *xprt)
{
@@ -1303,8 +1306,9 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt)
xprt->xid = prandom_u32();
}
-static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
+void xprt_request_init(struct rpc_task *task)
{
+ struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_rqst *req = task->tk_rqstp;
INIT_LIST_HEAD(&req->rq_list);
@@ -1312,7 +1316,6 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
req->rq_task = task;
req->rq_xprt = xprt;
req->rq_buffer = NULL;
- req->rq_xid = xprt_alloc_xid(xprt);
req->rq_connect_cookie = xprt->connect_cookie - 1;
req->rq_bytes_sent = 0;
req->rq_snd_buf.len = 0;
@@ -1373,7 +1376,7 @@ void xprt_release(struct rpc_task *task)
dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
if (likely(!bc_prealloc(req)))
- xprt_free_slot(xprt, req);
+ xprt->ops->free_slot(xprt, req);
else
xprt_free_bc_request(req);
}
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 47ebac949769..c8f1c2b89dad 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -29,29 +29,41 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
spin_unlock(&buf->rb_reqslock);
rpcrdma_destroy_req(req);
-
- kfree(rqst);
}
-static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
- struct rpc_rqst *rqst)
+static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
+ unsigned int count)
{
- struct rpcrdma_regbuf *rb;
- struct rpcrdma_req *req;
- size_t size;
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+ struct rpc_rqst *rqst;
+ unsigned int i;
+
+ for (i = 0; i < (count << 1); i++) {
+ struct rpcrdma_regbuf *rb;
+ struct rpcrdma_req *req;
+ size_t size;
+
+ req = rpcrdma_create_req(r_xprt);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ rqst = &req->rl_slot;
+
+ rqst->rq_xprt = xprt;
+ INIT_LIST_HEAD(&rqst->rq_list);
+ INIT_LIST_HEAD(&rqst->rq_bc_list);
+ __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
+ spin_lock_bh(&xprt->bc_pa_lock);
+ list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
+ spin_unlock_bh(&xprt->bc_pa_lock);
- req = rpcrdma_create_req(r_xprt);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- size = r_xprt->rx_data.inline_rsize;
- rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
- if (IS_ERR(rb))
- goto out_fail;
- req->rl_sendbuf = rb;
- xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
- min_t(size_t, size, PAGE_SIZE));
- rpcrdma_set_xprtdata(rqst, req);
+ size = r_xprt->rx_data.inline_rsize;
+ rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
+ if (IS_ERR(rb))
+ goto out_fail;
+ req->rl_sendbuf = rb;
+ xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
+ min_t(size_t, size, PAGE_SIZE));
+ }
return 0;
out_fail:
@@ -59,23 +71,6 @@ out_fail:
return -ENOMEM;
}
-/* Allocate and add receive buffers to the rpcrdma_buffer's
- * existing list of rep's. These are released when the
- * transport is destroyed.
- */
-static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
- unsigned int count)
-{
- int rc = 0;
-
- while (count--) {
- rc = rpcrdma_create_rep(r_xprt);
- if (rc)
- break;
- }
- return rc;
-}
-
/**
* xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
* @xprt: transport associated with these backchannel resources
@@ -86,9 +81,6 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
- struct rpc_rqst *rqst;
- unsigned int i;
int rc;
/* The backchannel reply path returns each rpc_rqst to the
@@ -103,35 +95,11 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
goto out_err;
- for (i = 0; i < (reqs << 1); i++) {
- rqst = kzalloc(sizeof(*rqst), GFP_KERNEL);
- if (!rqst)
- goto out_free;
-
- dprintk("RPC: %s: new rqst %p\n", __func__, rqst);
-
- rqst->rq_xprt = &r_xprt->rx_xprt;
- INIT_LIST_HEAD(&rqst->rq_list);
- INIT_LIST_HEAD(&rqst->rq_bc_list);
- __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
-
- if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
- goto out_free;
-
- spin_lock_bh(&xprt->bc_pa_lock);
- list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
- spin_unlock_bh(&xprt->bc_pa_lock);
- }
-
- rc = rpcrdma_bc_setup_reps(r_xprt, reqs);
+ rc = rpcrdma_bc_setup_reqs(r_xprt, reqs);
if (rc)
goto out_free;
- rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs);
- if (rc)
- goto out_free;
-
- buffer->rb_bc_srv_max_requests = reqs;
+ r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
request_module("svcrdma");
trace_xprtrdma_cb_setup(r_xprt, reqs);
return 0;
@@ -235,6 +203,7 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
if (rc < 0)
goto failed_marshal;
+ rpcrdma_post_recvs(r_xprt, true);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
return 0;
@@ -275,10 +244,14 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
*/
void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
{
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpc_xprt *xprt = rqst->rq_xprt;
dprintk("RPC: %s: freeing rqst %p (req %p)\n",
- __func__, rqst, rpcr_to_rdmar(rqst));
+ __func__, rqst, req);
+
+ rpcrdma_recv_buffer_put(req->rl_reply);
+ req->rl_reply = NULL;
spin_lock_bh(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index f2f63959fddd..58b472666255 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -156,10 +156,32 @@ out_release:
fmr_op_release_mr(mr);
}
+/* On success, sets:
+ * ep->rep_attr.cap.max_send_wr
+ * ep->rep_attr.cap.max_recv_wr
+ * cdata->max_requests
+ * ia->ri_max_segs
+ */
static int
fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
+ int max_qp_wr;
+
+ max_qp_wr = ia->ri_device->attrs.max_qp_wr;
+ max_qp_wr -= RPCRDMA_BACKWARD_WRS;
+ max_qp_wr -= 1;
+ if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
+ return -ENOMEM;
+ if (cdata->max_requests > max_qp_wr)
+ cdata->max_requests = max_qp_wr;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests;
+ ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
+ ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
+ ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+ ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
+ ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
+
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
RPCRDMA_MAX_FMR_SGES);
return 0;
@@ -219,6 +241,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents)
goto out_dmamap_err;
+ trace_xprtrdma_dma_map(mr);
for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index c59c5c788db0..d46dc7e6e30a 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -202,12 +202,22 @@ out_release:
frwr_op_release_mr(mr);
}
+/* On success, sets:
+ * ep->rep_attr.cap.max_send_wr
+ * ep->rep_attr.cap.max_recv_wr
+ * cdata->max_requests
+ * ia->ri_max_segs
+ *
+ * And these FRWR-related fields:
+ * ia->ri_max_frwr_depth
+ * ia->ri_mrtype
+ */
static int
frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
struct ib_device_attr *attrs = &ia->ri_device->attrs;
- int depth, delta;
+ int max_qp_wr, depth, delta;
ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
@@ -241,14 +251,26 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
} while (delta > 0);
}
- ep->rep_attr.cap.max_send_wr *= depth;
- if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) {
- cdata->max_requests = attrs->max_qp_wr / depth;
+ max_qp_wr = ia->ri_device->attrs.max_qp_wr;
+ max_qp_wr -= RPCRDMA_BACKWARD_WRS;
+ max_qp_wr -= 1;
+ if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
+ return -ENOMEM;
+ if (cdata->max_requests > max_qp_wr)
+ cdata->max_requests = max_qp_wr;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth;
+ if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
+ cdata->max_requests = max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests *
depth;
}
+ ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
+ ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
+ ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+ ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
+ ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
ia->ri_max_frwr_depth);
@@ -393,6 +415,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents)
goto out_dmamap_err;
+ trace_xprtrdma_dma_map(mr);
ibmr = frwr->fr_mr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index a762d192372b..f338065121f2 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (c) 2015, 2017 Oracle. All rights reserved.
*/
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index e8adad33d0bb..1c78516aa6f2 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -54,14 +55,6 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-static const char transfertypes[][12] = {
- "inline", /* no chunks */
- "read list", /* some argument via rdma read */
- "*read list", /* entire request via rdma read */
- "write list", /* some result via rdma write */
- "reply chunk" /* entire reply via rdma write */
-};
-
/* Returns size of largest RPC-over-RDMA header in a Call message
*
* The largest Call header contains a full-size Read list and a
@@ -230,7 +223,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
*/
*ppages = alloc_page(GFP_ATOMIC);
if (!*ppages)
- return -EAGAIN;
+ return -ENOBUFS;
}
seg->mr_page = *ppages;
seg->mr_offset = (char *)page_base;
@@ -365,7 +358,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
false, &mr);
if (IS_ERR(seg))
- goto out_maperr;
+ return PTR_ERR(seg);
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_read_segment(xdr, mr, pos) < 0)
@@ -377,11 +370,6 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
} while (nsegs);
return 0;
-
-out_maperr:
- if (PTR_ERR(seg) == -EAGAIN)
- xprt_wait_for_buffer_space(rqst->rq_task, NULL);
- return PTR_ERR(seg);
}
/* Register and XDR encode the Write list. Supports encoding a list
@@ -428,7 +416,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mr);
if (IS_ERR(seg))
- goto out_maperr;
+ return PTR_ERR(seg);
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mr) < 0)
@@ -445,11 +433,6 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
*segcount = cpu_to_be32(nchunks);
return 0;
-
-out_maperr:
- if (PTR_ERR(seg) == -EAGAIN)
- xprt_wait_for_buffer_space(rqst->rq_task, NULL);
- return PTR_ERR(seg);
}
/* Register and XDR encode the Reply chunk. Supports encoding an array
@@ -491,7 +474,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mr);
if (IS_ERR(seg))
- goto out_maperr;
+ return PTR_ERR(seg);
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mr) < 0)
@@ -508,11 +491,6 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
*segcount = cpu_to_be32(nchunks);
return 0;
-
-out_maperr:
- if (PTR_ERR(seg) == -EAGAIN)
- xprt_wait_for_buffer_space(rqst->rq_task, NULL);
- return PTR_ERR(seg);
}
/**
@@ -709,7 +687,7 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
{
req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
if (!req->rl_sendctx)
- return -ENOBUFS;
+ return -EAGAIN;
req->rl_sendctx->sc_wr.num_sge = 0;
req->rl_sendctx->sc_unmap_count = 0;
req->rl_sendctx->sc_req = req;
@@ -883,7 +861,15 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
return 0;
out_err:
- r_xprt->rx_stats.failed_marshal_count++;
+ switch (ret) {
+ case -EAGAIN:
+ xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+ break;
+ case -ENOBUFS:
+ break;
+ default:
+ r_xprt->rx_stats.failed_marshal_count++;
+ }
return ret;
}
@@ -1026,8 +1012,6 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
out_short:
pr_warn("RPC/RDMA short backward direction call\n");
- if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
- xprt_disconnect_done(&r_xprt->rx_xprt);
return true;
}
#else /* CONFIG_SUNRPC_BACKCHANNEL */
@@ -1333,13 +1317,14 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
u32 credits;
__be32 *p;
+ --buf->rb_posted_receives;
+
if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus;
+ /* Fixed transport header fields */
xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
rep->rr_hdrbuf.head[0].iov_base);
-
- /* Fixed transport header fields */
p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
if (unlikely(!p))
goto out_shortreply;
@@ -1378,17 +1363,10 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
+ rpcrdma_post_recvs(r_xprt, false);
queue_work(rpcrdma_receive_wq, &rep->rr_work);
return;
-out_badstatus:
- rpcrdma_recv_buffer_put(rep);
- if (r_xprt->rx_ep.rep_connected == 1) {
- r_xprt->rx_ep.rep_connected = -EIO;
- rpcrdma_conn_func(&r_xprt->rx_ep);
- }
- return;
-
out_badversion:
trace_xprtrdma_reply_vers(rep);
goto repost;
@@ -1408,7 +1386,7 @@ out_shortreply:
* receive buffer before returning.
*/
repost:
- r_xprt->rx_stats.bad_reply_count++;
- if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
- rpcrdma_recv_buffer_put(rep);
+ rpcrdma_post_recvs(r_xprt, false);
+out_badstatus:
+ rpcrdma_recv_buffer_put(rep);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index a73632ca9048..1035516d54e2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -273,6 +273,7 @@ static const struct rpc_xprt_ops xprt_rdma_bc_procs = {
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong,
.alloc_slot = xprt_alloc_slot,
+ .free_slot = xprt_free_slot,
.release_request = xprt_release_rqst_cong,
.buf_alloc = xprt_rdma_bc_allocate,
.buf_free = xprt_rdma_bc_free,
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index cc1aad325496..0c775f05123c 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -330,9 +331,7 @@ xprt_setup_rdma(struct xprt_create *args)
return ERR_PTR(-EBADF);
}
- xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
- xprt_rdma_slot_table_entries,
- xprt_rdma_slot_table_entries);
+ xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0);
if (xprt == NULL) {
dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
__func__);
@@ -364,7 +363,7 @@ xprt_setup_rdma(struct xprt_create *args)
xprt_set_bound(xprt);
xprt_rdma_format_addresses(xprt, sap);
- cdata.max_requests = xprt->max_reqs;
+ cdata.max_requests = xprt_rdma_slot_table_entries;
cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
@@ -537,6 +536,47 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
}
}
+/**
+ * xprt_rdma_alloc_slot - allocate an rpc_rqst
+ * @xprt: controlling RPC transport
+ * @task: RPC task requesting a fresh rpc_rqst
+ *
+ * tk_status values:
+ * %0 if task->tk_rqstp points to a fresh rpc_rqst
+ * %-EAGAIN if no rpc_rqst is available; queued on backlog
+ */
+static void
+xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_req *req;
+
+ req = rpcrdma_buffer_get(&r_xprt->rx_buf);
+ if (!req)
+ goto out_sleep;
+ task->tk_rqstp = &req->rl_slot;
+ task->tk_status = 0;
+ return;
+
+out_sleep:
+ rpc_sleep_on(&xprt->backlog, task, NULL);
+ task->tk_status = -EAGAIN;
+}
+
+/**
+ * xprt_rdma_free_slot - release an rpc_rqst
+ * @xprt: controlling RPC transport
+ * @rqst: rpc_rqst to release
+ *
+ */
+static void
+xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
+{
+ memset(rqst, 0, sizeof(*rqst));
+ rpcrdma_buffer_put(rpcr_to_rdmar(rqst));
+ rpc_wake_up_next(&xprt->backlog);
+}
+
static bool
rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
size_t size, gfp_t flags)
@@ -607,13 +647,9 @@ xprt_rdma_allocate(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
- struct rpcrdma_req *req;
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
gfp_t flags;
- req = rpcrdma_buffer_get(&r_xprt->rx_buf);
- if (req == NULL)
- goto out_get;
-
flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
@@ -623,15 +659,12 @@ xprt_rdma_allocate(struct rpc_task *task)
if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
goto out_fail;
- rpcrdma_set_xprtdata(rqst, req);
rqst->rq_buffer = req->rl_sendbuf->rg_base;
rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
trace_xprtrdma_allocate(task, req);
return 0;
out_fail:
- rpcrdma_buffer_put(req);
-out_get:
trace_xprtrdma_allocate(task, NULL);
return -ENOMEM;
}
@@ -652,7 +685,6 @@ xprt_rdma_free(struct rpc_task *task)
if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
rpcrdma_release_rqst(r_xprt, req);
trace_xprtrdma_rpc_done(task, req);
- rpcrdma_buffer_put(req);
}
/**
@@ -690,9 +722,6 @@ xprt_rdma_send_request(struct rpc_task *task)
if (rc < 0)
goto failed_marshal;
- if (req->rl_reply == NULL) /* e.g. reconnection */
- rpcrdma_recv_buffer_get(req);
-
/* Must suppress retransmit to maintain credits */
if (rqst->rq_connect_cookie == xprt->connect_cookie)
goto drop_connection;
@@ -779,7 +808,8 @@ xprt_rdma_disable_swap(struct rpc_xprt *xprt)
static const struct rpc_xprt_ops xprt_rdma_procs = {
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
- .alloc_slot = xprt_alloc_slot,
+ .alloc_slot = xprt_rdma_alloc_slot,
+ .free_slot = xp