// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
* Work Requests exploiting Infiniband API
*
* Work requests (WR) of type ib_post_send or ib_post_recv respectively
* are submitted to either RC SQ or RC RQ respectively
* (reliably connected send/receive queue)
* and become work queue entries (WQEs).
* While an SQ WR/WQE is pending, we track it until transmission completion.
* Through a send or receive completion queue (CQ) respectively,
* we get completion queue entries (CQEs) [aka work completions (WCs)].
* Since the CQ callback is called from IRQ context, we split work by using
* bottom halves implemented by tasklets.
*
* SMC uses this to exchange LLC (link layer control)
* and CDC (connection data control) messages.
*
* Copyright IBM Corp. 2016
*
* Author(s): Steffen Maier <maier@linux.vnet.ibm.com>
*/
#include <linux/atomic.h>
#include <linux/hashtable.h>
#include <linux/wait.h>
#include <rdma/ib_verbs.h>
#include <asm/div64.h>
#include "smc.h"
#include "smc_wr.h"
#define SMC_WR_MAX_POLL_CQE 10 /* max. # of compl. queue elements in 1 poll */
#define SMC_WR_RX_HASH_BITS 4
static DEFINE_HASHTABLE(smc_wr_rx_hash, SMC_WR_RX_HASH_BITS);
static DEFINE_SPINLOCK(smc_wr_rx_hash_lock);
struct smc_wr_tx_pend { /* control data for a pending send request */
u64 wr_id; /* work request id sent */
smc_wr_tx_handler handler;
enum ib_wc_status wc_status; /* CQE status */
struct smc_link *link;
u32 idx;
struct smc_wr_tx_pend_priv priv;
u8 compl_requested;
};
/******************************** send queue *********************************/
/*------------------------------- completion --------------------------------*/
/* returns true if at least one tx work request is pending on the given link */
static inline bool smc_wr_is_tx_pend(struct smc_link *link)
{
return !bitmap_empty(link->wr_tx_mask, link->wr_tx_cnt);
}
/* wait till all pending tx work requests on the given link are completed */
void smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
{
wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link));
}
static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id)
{
u32 i;
for (i = 0; i < link->wr_tx_cnt; i++) {
if (link->wr_tx_pends[i].wr_id == wr_id)
return i;
}
return link->wr_tx_cnt;
}
static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
{
struct smc_wr_tx_pend pnd_snd;
struct smc_link *link;
u32 pnd_snd_idx;
link = wc->qp->qp_context;
if (wc->opcode == IB_WC_REG_MR) {
if (wc->status)
link->wr_reg_state = FAILED;
else
link->wr_reg_state = CONFIRMED;
smc_wr_wakeup_reg_wait(link);
return;
}
pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
if (pnd_snd_idx == link->wr_tx_cnt) {
if (link->lgr->smc_version != SMC_V2 ||
link->wr_tx_v2_pend->wr_id != wc->wr_id)
return;
link->wr_tx_v2_pend->wc_status = wc->status;
memcpy(&pnd_snd, link->wr_tx_v2_pend, sizeof(pnd_snd));
/* clear the full struct smc_wr_tx_pend including .priv */
memset(link->wr_tx_v2_pend, 0,
sizeof(*link->wr_tx_v2_pend));
memset(link->lgr->wr_tx_buf_v2, 0,
sizeof(*link->lgr->wr_tx_buf_v2));
} else {
link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
if (link->wr_tx_pends[pnd_snd_idx].compl_requested)
complete(&link->wr_tx_compl[pnd_snd_idx]);
memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx],
sizeof(pnd_snd));
/* clear the full struct smc_wr_tx_pend including .priv */
memset(&link->wr_tx_pends[pnd_snd_idx], 0,
sizeof(link->wr_tx_pends[pnd_snd_idx]));
memset(&link->wr_tx_bufs[pnd_snd_idx], 0,
sizeof(link->wr_tx_bufs[pnd_snd_idx]));
if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
return;
}
if (wc->status) {
if (link->lgr->smc_version == SMC_V2) {
memset(link->wr_tx_v2_pend, 0,
sizeof(*link->wr_tx_v2_pend));
memset(link->lgr->wr_tx_buf_v2, 0,
sizeof(*link->lgr->wr_tx_buf_v2));
}
/* terminate link */
smcr_link_down_cond_sched(link);
}
if (pnd_snd.handler)
pnd_snd.handler(&pnd_snd.priv, link, wc->status);
wake_up(&link->wr_tx_wait);
}
static void smc_wr_tx_tasklet_fn(struct tasklet_struct *t)
{
struct smc_ib_device *dev = from_tasklet(dev, t, send_tasklet);
struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
int i = 0, rc;
int polled = 0;
again:
polled++;
do {
memset(&wc, 0, sizeof(wc));
rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
if (polled == 1) {
ib_req_notify_cq(dev->roce_cq