From f8181697fd09bb3b6d857cef31d9af93ea2b0758 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 1 Jul 2016 16:00:49 -0700 Subject: IB/hfi1: Clean up port state structure definition The definition of port state changed mid development and the old structure was kept accidentally. Remove this dead code. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/opa_port_info.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index 2b95c2c336eb..9303e0e4f508 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -33,11 +33,6 @@ #if !defined(OPA_PORT_INFO_H) #define OPA_PORT_INFO_H -/* Temporary until HFI driver is updated */ -#ifndef USE_PI_LED_ENABLE -#define USE_PI_LED_ENABLE 0 -#endif - #define OPA_PORT_LINK_MODE_NOP 0 /* No change */ #define OPA_PORT_LINK_MODE_OPA 4 /* Port mode is OPA */ @@ -274,23 +269,12 @@ enum port_info_field_masks { OPA_PI_MASK_MTU_CAP = 0x0F, }; -#if USE_PI_LED_ENABLE struct opa_port_states { u8 reserved; u8 ledenable_offlinereason; /* 1 res, 1 bit, 6 bits */ u8 reserved2; u8 portphysstate_portstate; /* 4 bits, 4 bits */ }; -#define PI_LED_ENABLE_SUP 1 -#else -struct opa_port_states { - u8 reserved; - u8 offline_reason; /* 2 res, 6 bits */ - u8 reserved2; - u8 portphysstate_portstate; /* 4 bits, 4 bits */ -}; -#define PI_LED_ENABLE_SUP 0 -#endif struct opa_port_state_info { struct opa_port_states port_states; -- cgit v1.2.3 From 632bc3f65081dd1e2e5394a9161580a0f78e8839 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 21 Jul 2016 13:03:30 -0700 Subject: IB/core, RDMA RW API: Do not exceed QP SGE send limit Compute the SGE limit for RDMA READ and WRITE requests in ib_create_qp(). Use that limit in the RDMA RW API implementation. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Sagi Grimberg Cc: Steve Wise Cc: Parav Pandit Cc: Nicholas Bellinger Cc: Laurence Oberman Cc: #v4.7+ Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7e440d41487a..e694f02d42e3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1428,6 +1428,10 @@ struct ib_srq { } ext; }; +/* + * @max_write_sge: Maximum SGE elements per RDMA WRITE request. + * @max_read_sge: Maximum SGE elements per RDMA READ request. + */ struct ib_qp { struct ib_device *device; struct ib_pd *pd; @@ -1449,6 +1453,8 @@ struct ib_qp { void (*event_handler)(struct ib_event *, void *); void *qp_context; u32 qp_num; + u32 max_write_sge; + u32 max_read_sge; enum ib_qp_type qp_type; }; -- cgit v1.2.3 From afcf8f7647780aa147ad68be48d223cd50311b9a Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 1 Jul 2016 16:02:07 -0700 Subject: IB/rdmavt: Add data structures and routines for table driven post send Add flexibility for driver dependent operations in post send because different drivers will have differing post send operation support. This includes data structure definitions to support a table driven scheme along with the necessary validation routine using the new table. Reviewed-by: Ashutosh Dixit Reviewed-by: Jianxin Xiong Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 3 +++ include/rdma/rdmavt_qp.h | 28 ++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 9c9a27d42aaa..3a70dc047314 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -351,6 +351,9 @@ struct rvt_dev_info { /* Driver specific properties */ struct rvt_driver_params dparms; + /* post send table */ + const struct rvt_operation_params *post_parms; + struct rvt_mregion __rcu *dma_mr; struct rvt_lkey_table lkey_table; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 6d23b879416a..a90d1e941504 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -228,11 +228,31 @@ struct rvt_ack_entry { #define RC_QP_SCALING_INTERVAL 5 -/* - * Variables prefixed with s_ are for the requester (sender). - * Variables prefixed with r_ are for the responder (receiver). - * Variables prefixed with ack_ are for responder replies. +#define RVT_OPERATION_PRIV 0x00000001 +#define RVT_OPERATION_ATOMIC 0x00000002 +#define RVT_OPERATION_ATOMIC_SGE 0x00000004 + +#define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1) + +/** + * rvt_operation_params - op table entry + * @length - the length to copy into the swqe entry + * @qpt_support - a bit mask indicating QP type support + * @flags - RVT_OPERATION flags (see above) + * + * This supports table driven post send so that + * the driver can have differing an potentially + * different sets of operations. * + **/ + +struct rvt_operation_params { + size_t length; + u32 qpt_support; + u32 flags; +}; + +/* * Common variables are protected by both r_rq.lock and s_lock in that order * which only happens in modify_qp() or changing the QP 'state'. */ -- cgit v1.2.3 From e8f8b098a44a66d3da81e460aed465f26693e120 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Mon, 25 Jul 2016 13:38:19 -0700 Subject: IB/rdmavt: Add mechanism to invalidate MR keys In order to support extended memory management, add the mechanism to invalidate MR keys. This includes a flag "lkey_invalid" in the MR data structure that is to be checked when validating access to the MR via the associated key, and two utility functions to perform fast memory registration and memory key invalidate operations. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Jianxin Xiong Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 3 +++ include/rdma/rdmavt_mr.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 3a70dc047314..7fdba92d4c05 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -487,6 +487,9 @@ void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, int port_index, u16 *pkey_table); +int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, + int access); +int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index 5edffdca8c53..6b3c6c8b6b77 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h @@ -81,6 +81,7 @@ struct rvt_mregion { u32 mapsz; /* size of the map array */ u8 page_shift; /* 0 - non unform/non powerof2 sizes */ u8 lkey_published; /* in global table */ + atomic_t lkey_invalid; /* true if current lkey is invalid */ struct completion comp; /* complete when refcount goes to zero */ atomic_t refcount; struct rvt_segarray *map[0]; /* the segments */ -- cgit v1.2.3 From d9f8723924d5955979d05cb7f4f10d9ebac39b7d Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Mon, 25 Jul 2016 13:38:25 -0700 Subject: IB/rdmavt: Handle local operations in post send Some work requests are local operations, such as IB_WR_REG_MR and IB_WR_LOCAL_INV. They differ from non-local operations in that: (1) Local operations can be processed immediately without being posted to the send queue if neither fencing nor completion generation is needed. However, to ensure correct ordering, once a local operation is posted to the work queue due to fencing or completion requiement, all subsequent local operations must also be posted to the work queue until all the local operations on the work queue have completed. (2) Local operations don't send packets over the wire and thus don't need (and shouldn't update) the packet sequence numbers. Define a new a flag bit for the post send table to identify local operations. Add a new field to the QP structure to track the number of local operations on the send queue to determine if direct processing of new local operations should be enabled/disabled. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Jianxin Xiong Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index a90d1e941504..b0ab12b30f1e 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -231,6 +231,7 @@ struct rvt_ack_entry { #define RVT_OPERATION_PRIV 0x00000001 #define RVT_OPERATION_ATOMIC 0x00000002 #define RVT_OPERATION_ATOMIC_SGE 0x00000004 +#define RVT_OPERATION_LOCAL 0x00000008 #define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1) @@ -363,6 +364,8 @@ struct rvt_qp { struct rvt_sge_state s_ack_rdma_sge; struct timer_list s_timer; + atomic_t local_ops_pending; /* number of fast_reg/local_inv reqs */ + /* * This sge list MUST be last. Do not add anything below here. */ -- cgit v1.2.3 From 856cc4c237add46510c8ae91764f4eda31a9e1cf Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 25 Jul 2016 13:39:39 -0700 Subject: IB/hfi1: Add the capability for reserved operations This fix allows for support of in-kernel reserved operations without impacting the ULP user. The low level driver can register a non-zero value which will be transparently added to the send queue size and hidden from the ULP in every respect. ULP post sends will never see a full queue due to a reserved post send and reserved operations will never exceed that registered value. The s_avail will continue to track the ULP swqe availability and the difference between the reserved value and the reserved in use will track reserved availabity. Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 1 + include/rdma/rdmavt_qp.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 7fdba92d4c05..e31502107a58 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -158,6 +158,7 @@ struct rvt_driver_params { u32 max_mad_size; u8 qos_shift; u8 max_rdma_atomic; + u8 reserved_operations; }; /* Protection domain */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index b0ab12b30f1e..56adcfcabe0b 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -144,6 +144,11 @@ #define RVT_PROCESS_OR_FLUSH_SEND \ (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND) +/* + * Internal send flags + */ +#define RVT_SEND_RESERVE_USED IB_SEND_RESERVED_START + /* * Send work request queue entry. * The size of the sg_list is determined when the QP is created and stored @@ -232,6 +237,7 @@ struct rvt_ack_entry { #define RVT_OPERATION_ATOMIC 0x00000002 #define RVT_OPERATION_ATOMIC_SGE 0x00000004 #define RVT_OPERATION_LOCAL 0x00000008 +#define RVT_OPERATION_USE_RESERVE 0x00000010 #define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1) @@ -328,6 +334,7 @@ struct rvt_qp { u32 s_next_psn; /* PSN for next request */ u32 s_avail; /* number of entries avail */ u32 s_ssn; /* SSN of tail entry */ + atomic_t s_reserved_used; /* reserved entries in use */ spinlock_t s_lock ____cacheline_aligned_in_smp; u32 s_flags; @@ -459,6 +466,49 @@ static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) rq->max_sge * sizeof(struct ib_sge)) * n); } +/** + * rvt_qp_wqe_reserve - reserve operation + * @qp - the rvt qp + * @wqe - the send wqe + * + * This routine used in post send to record + * a wqe relative reserved operation use. + */ +static inline void rvt_qp_wqe_reserve( + struct rvt_qp *qp, + struct rvt_swqe *wqe) +{ + wqe->wr.send_flags |= RVT_SEND_RESERVE_USED; + atomic_inc(&qp->s_reserved_used); +} + +/** + * rvt_qp_wqe_unreserve - clean reserved operation + * @qp - the rvt qp + * @wqe - the send wqe + * + * This decrements the reserve use count. + * + * This call MUST precede the change to + * s_last to insure that post send sees a stable + * s_avail. + * + * An smp_mp__after_atomic() is used to insure + * the compiler does not juggle the order of the s_last + * ring index and the decrementing of s_reserved_used. + */ +static inline void rvt_qp_wqe_unreserve( + struct rvt_qp *qp, + struct rvt_swqe *wqe) +{ + if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) { + wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED; + atomic_dec(&qp->s_reserved_used); + /* insure no compiler re-order up to s_last change */ + smp_mb__after_atomic(); + } +} + extern const int ib_rvt_state_ops[]; struct rvt_dev_info; -- cgit v1.2.3 From d9b13c203003cfb78c1f216049a204d385ccaeff Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Mon, 25 Jul 2016 13:39:45 -0700 Subject: IB/rdmavt, hfi1: Fix NFSoRDMA failure with FRMR enabled Hanging has been observed while writing a file over NFSoRDMA. Dmesg on the server contains messages like these: [ 931.992501] svcrdma: Error -22 posting RDMA_READ [ 952.076879] svcrdma: Error -22 posting RDMA_READ [ 982.154127] svcrdma: Error -22 posting RDMA_READ [ 1012.235884] svcrdma: Error -22 posting RDMA_READ [ 1042.319194] svcrdma: Error -22 posting RDMA_READ Here is why: With the base memory management extension enabled, FRMR is used instead of FMR. The xprtrdma server issues each RDMA read request as the following bundle: (1)IB_WR_REG_MR, signaled; (2)IB_WR_RDMA_READ, signaled; (3)IB_WR_LOCAL_INV, signaled & fencing. These requests are signaled. In order to generate completion, the fast register work request is processed by the hfi1 send engine after being posted to the work queue, and the corresponding lkey is not valid until the request is processed. However, the rdmavt driver validates lkey when the RDMA read request is posted and thus it fails immediately with error -EINVAL (-22). This patch changes the work flow of local operations (fast register and local invalidate) so that fast register work requests are always processed immediately to ensure that the corresponding lkey is valid when subsequent work requests are posted. Local invalidate requests are processed immediately if fencing is not required and no previous local invalidate request is pending. To allow completion generation for signaled local operations that have been processed before posting to the work queue, an internal send flag RVT_SEND_COMPLETION_ONLY is added. The hfi1 send engine checks this flag and only generates completion for such requests. Reviewed-by: Mike Marciniszyn Signed-off-by: Jianxin Xiong Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 56adcfcabe0b..13902dd319a9 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -148,6 +148,7 @@ * Internal send flags */ #define RVT_SEND_RESERVE_USED IB_SEND_RESERVED_START +#define RVT_SEND_COMPLETION_ONLY (IB_SEND_RESERVED_START << 1) /* * Send work request queue entry. -- cgit v1.2.3 From fe508272c963d62de4183c32b6883c3d54c557ef Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 27 Jul 2016 21:07:36 -0400 Subject: IB/rdmavt: Eliminate redundant opcode test in mr ref clear The use of the specific opcode test is redundant since all ack entry users correctly manipulate the mr pointer to selectively trigger the reference clearing. The overly specific test hinders the use of implementation specific operations. The change needs to get rid of the union to insure that an atomic value is not seen as an MR pointer. Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 13902dd319a9..bd34d0b56bf7 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -222,14 +222,12 @@ struct rvt_mmap_info { * to send a RDMA read response or atomic operation. */ struct rvt_ack_entry { - u8 opcode; - u8 sent; + struct rvt_sge rdma_sge; + u64 atomic_data; u32 psn; u32 lpsn; - union { - struct rvt_sge rdma_sge; - u64 atomic_data; - }; + u8 opcode; + u8 sent; }; #define RC_QP_SCALING_INTERVAL 5 -- cgit v1.2.3 From c49298026908a8ce9dcf01ed68734ad171cef98b Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 27 Jul 2016 21:08:42 -0400 Subject: IB/hfi1: Allow for non-double word multiple message sizes for user SDMA The driver pads non-double word multiple message sizes but it doesn't account for this padding when the packet length is calculated. Also, the data length is miscalculated for message sizes less than 4 bytes due to the bit representation in LRH. And there's a check for non-double word multiple message sizes that prevents these messages from being sent. This patch fixes length miscalculations and enables the functionality to send non-double word multiple message sizes. Reviewed-by: Harish Chegondi Signed-off-by: Sebastian Sanchez Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- include/uapi/rdma/hfi/hfi1_user.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index 98bebf8bef55..d15e7289d835 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -75,7 +75,7 @@ * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define HFI1_USER_SWMINOR 1 +#define HFI1_USER_SWMINOR 2 /* * We will encode the major/minor inside a single 32bit version number. -- cgit v1.2.3