OSDN Git Service

IB/hfi1: OPFN interface
authorKaike Wan <kaike.wan@intel.com>
Thu, 24 Jan 2019 03:20:52 +0000 (19:20 -0800)
committerDoug Ledford <dledford@redhat.com>
Thu, 31 Jan 2019 16:36:05 +0000 (11:36 -0500)
OPFN allows a pair of connected RC QPs to exchange a set of parameters
in succession. The parameter exchange itself is done using the IB compare
and swap request with a special virtual address. The request is triggered
using a reserved IB work request opcode. This patch implements the OPFN
interface to initialize, start, process, and reset the OPFN request.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/Makefile
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/opfn.c [new file with mode: 0644]
drivers/infiniband/hw/hfi1/opfn.h

index 3ce9dc8..4044a8c 100644 (file)
@@ -24,6 +24,7 @@ hfi1-y := \
        mad.o \
        mmu_rb.o \
        msix.o \
+       opfn.o \
        pcie.o \
        pio.o \
        pio_copy.o \
index ddfcf2f..9aa0357 100644 (file)
@@ -99,6 +99,8 @@
 #define NEIGHBOR_TYPE_HFI              0
 #define NEIGHBOR_TYPE_SWITCH   1
 
+#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
+
 extern unsigned long hfi1_cap_mask;
 #define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap)
 #define HFI1_CAP_UGET_MASK(mask, cap) \
index 2ba5a2a..09c898d 100644 (file)
@@ -72,7 +72,6 @@
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
 
-#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
 /*
  * min buffers we want to have per context, after driver
  */
diff --git a/drivers/infiniband/hw/hfi1/opfn.c b/drivers/infiniband/hw/hfi1/opfn.c
new file mode 100644 (file)
index 0000000..2d46c91
--- /dev/null
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#include "hfi.h"
+#include "trace.h"
+#include "qp.h"
+#include "opfn.h"
+
+#define IB_BTHE_E                 BIT(IB_BTHE_E_SHIFT)
+
+#define OPFN_CODE(code) BIT((code) - 1)
+#define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code)
+
+struct hfi1_opfn_type {
+       bool (*request)(struct rvt_qp *qp, u64 *data);
+       bool (*response)(struct rvt_qp *qp, u64 *data);
+       bool (*reply)(struct rvt_qp *qp, u64 data);
+       void (*error)(struct rvt_qp *qp);
+};
+
+static struct hfi1_opfn_type hfi1_opfn_handlers[STL_VERBS_EXTD_MAX] = {
+       [STL_VERBS_EXTD_TID_RDMA] = {
+               .request = tid_rdma_conn_req,
+               .response = tid_rdma_conn_resp,
+               .reply = tid_rdma_conn_reply,
+               .error = tid_rdma_conn_error,
+       },
+};
+
+static struct workqueue_struct *opfn_wq;
+
+static void opfn_schedule_conn_request(struct rvt_qp *qp);
+
+static bool hfi1_opfn_extended(u32 bth1)
+{
+       return !!(bth1 & IB_BTHE_E);
+}
+
+static void opfn_conn_request(struct rvt_qp *qp)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+       struct ib_atomic_wr wr;
+       u16 mask, capcode;
+       struct hfi1_opfn_type *extd;
+       u64 data;
+       unsigned long flags;
+       int ret = 0;
+
+       spin_lock_irqsave(&priv->opfn.lock, flags);
+       /*
+        * Exit if the extended bit is not set, or if nothing is requested, or
+        * if we have completed all requests, or if a previous request is in
+        * progress
+        */
+       if (!priv->opfn.extended || !priv->opfn.requested ||
+           priv->opfn.requested == priv->opfn.completed || priv->opfn.curr)
+               goto done;
+
+       mask = priv->opfn.requested & ~priv->opfn.completed;
+       capcode = ilog2(mask & ~(mask - 1)) + 1;
+       if (capcode >= STL_VERBS_EXTD_MAX) {
+               priv->opfn.completed |= OPFN_CODE(capcode);
+               goto done;
+       }
+
+       extd = &hfi1_opfn_handlers[capcode];
+       if (!extd || !extd->request || !extd->request(qp, &data)) {
+               /*
+                * Either there is no handler for this capability or the request
+                * packet could not be generated. Either way, mark it as done so
+                * we don't keep attempting to complete it.
+                */
+               priv->opfn.completed |= OPFN_CODE(capcode);
+               goto done;
+       }
+
+       data = (data & ~0xf) | capcode;
+
+       memset(&wr, 0, sizeof(wr));
+       wr.wr.opcode = IB_WR_OPFN;
+       wr.remote_addr = HFI1_VERBS_E_ATOMIC_VADDR;
+       wr.compare_add = data;
+
+       priv->opfn.curr = capcode;      /* A new request is now in progress */
+       /* Drop opfn.lock before calling ib_post_send() */
+       spin_unlock_irqrestore(&priv->opfn.lock, flags);
+
+       ret = ib_post_send(&qp->ibqp, &wr.wr, NULL);
+       if (ret)
+               goto err;
+       return;
+err:
+       spin_lock_irqsave(&priv->opfn.lock, flags);
+       /*
+        * In case of an unexpected error return from ib_post_send
+        * clear opfn.curr and reschedule to try again
+        */
+       priv->opfn.curr = STL_VERBS_EXTD_NONE;
+       opfn_schedule_conn_request(qp);
+done:
+       spin_unlock_irqrestore(&priv->opfn.lock, flags);
+}
+
+void opfn_send_conn_request(struct work_struct *work)
+{
+       struct hfi1_opfn_data *od;
+       struct hfi1_qp_priv *qpriv;
+
+       od = container_of(work, struct hfi1_opfn_data, opfn_work);
+       qpriv = container_of(od, struct hfi1_qp_priv, opfn);
+
+       opfn_conn_request(qpriv->owner);
+}
+
+/*
+ * When QP s_lock is held in the caller, the OPFN request must be scheduled
+ * to a different workqueue to avoid double locking QP s_lock in call to
+ * ib_post_send in opfn_conn_request
+ */
+static void opfn_schedule_conn_request(struct rvt_qp *qp)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+
+       queue_work(opfn_wq, &priv->opfn.opfn_work);
+}
+
+void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
+                       struct ib_atomic_eth *ateth)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+       u64 data = be64_to_cpu(ateth->compare_data);
+       struct hfi1_opfn_type *extd;
+       u8 capcode;
+       unsigned long flags;
+
+       capcode = data & 0xf;
+       if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
+               return;
+
+       extd = &hfi1_opfn_handlers[capcode];
+
+       if (!extd || !extd->response) {
+               e->atomic_data = capcode;
+               return;
+       }
+
+       spin_lock_irqsave(&priv->opfn.lock, flags);
+       if (priv->opfn.completed & OPFN_CODE(capcode)) {
+               /*
+                * We are receiving a request for a feature that has already
+                * been negotiated. This may mean that the other side has reset
+                */
+               priv->opfn.completed &= ~OPFN_CODE(capcode);
+               if (extd->error)
+                       extd->error(qp);
+       }
+
+       if (extd->response(qp, &data))
+               priv->opfn.completed |= OPFN_CODE(capcode);
+       e->atomic_data = (data & ~0xf) | capcode;
+       spin_unlock_irqrestore(&priv->opfn.lock, flags);
+}
+
+void opfn_conn_reply(struct rvt_qp *qp, u64 data)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+       struct hfi1_opfn_type *extd;
+       u8 capcode;
+       unsigned long flags;
+
+       capcode = data & 0xf;
+       if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
+               return;
+
+       spin_lock_irqsave(&priv->opfn.lock, flags);
+       /*
+        * Either there is no previous request or the reply is not for the
+        * current request
+        */
+       if (!priv->opfn.curr || capcode != priv->opfn.curr)
+               goto done;
+
+       extd = &hfi1_opfn_handlers[capcode];
+
+       if (!extd || !extd->reply)
+               goto clear;
+
+       if (extd->reply(qp, data))
+               priv->opfn.completed |= OPFN_CODE(capcode);
+clear:
+       /*
+        * Clear opfn.curr to indicate that the previous request is no longer in
+        * progress
+        */
+       priv->opfn.curr = STL_VERBS_EXTD_NONE;
+done:
+       spin_unlock_irqrestore(&priv->opfn.lock, flags);
+}
+
+void opfn_conn_error(struct rvt_qp *qp)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+       struct hfi1_opfn_type *extd = NULL;
+       unsigned long flags;
+       u16 capcode;
+
+       /*
+        * The QP has gone into the Error state. We have to invalidate all
+        * negotiated feature, including the one in progress (if any). The RC
+        * QP handling will clean the WQE for the connection request.
+        */
+       spin_lock_irqsave(&priv->opfn.lock, flags);
+       while (priv->opfn.completed) {
+               capcode = priv->opfn.completed & ~(priv->opfn.completed - 1);
+               extd = &hfi1_opfn_handlers[ilog2(capcode) + 1];
+               if (extd->error)
+                       extd->error(qp);
+               priv->opfn.completed &= ~OPFN_CODE(capcode);
+       }
+       priv->opfn.extended = 0;
+       priv->opfn.requested = 0;
+       priv->opfn.curr = STL_VERBS_EXTD_NONE;
+       spin_unlock_irqrestore(&priv->opfn.lock, flags);
+}
+
+void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
+{
+       struct ib_qp *ibqp = &qp->ibqp;
+       struct hfi1_qp_priv *priv = qp->priv;
+       unsigned long flags;
+
+       spin_lock_irqsave(&priv->opfn.lock, flags);
+       if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
+               struct tid_rdma_params *local = &priv->tid_rdma.local;
+
+               if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) ||
+                   qp->pmtu == enum_to_mtu(OPA_MTU_8192)) {
+                       tid_rdma_opfn_init(qp, local);
+                       /*
+                        * We only want to set the OPFN requested bit when the
+                        * QP transitions to RTS.
+                        */
+                       if (attr_mask & IB_QP_STATE &&
+                           attr->qp_state == IB_QPS_RTS) {
+                               priv->opfn.requested |= OPFN_MASK(TID_RDMA);
+                               /*
+                                * If the QP is transitioning to RTS and the
+                                * opfn.completed for TID RDMA has already been
+                                * set, the QP is being moved *back* into RTS.
+                                * We can now renegotiate the TID RDMA
+                                * parameters.
+                                */
+                               if (priv->opfn.completed &
+                                   OPFN_MASK(TID_RDMA)) {
+                                       priv->opfn.completed &=
+                                               ~OPFN_MASK(TID_RDMA);
+                                       /*
+                                        * Since the opfn.completed bit was
+                                        * already set, it is safe to assume
+                                        * that the opfn.extended is also set.
+                                        */
+                                       opfn_schedule_conn_request(qp);
+                               }
+                       }
+               } else {
+                       memset(local, 0, sizeof(*local));
+               }
+       }
+       spin_unlock_irqrestore(&priv->opfn.lock, flags);
+}
+
+void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+
+       if (!priv->opfn.extended && hfi1_opfn_extended(bth1) &&
+           HFI1_CAP_IS_KSET(OPFN)) {
+               priv->opfn.extended = 1;
+               if (qp->state == IB_QPS_RTS)
+                       opfn_conn_request(qp);
+       }
+}
+
+int opfn_init(void)
+{
+       opfn_wq = alloc_workqueue("hfi_opfn",
+                                 WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
+                                 WQ_MEM_RECLAIM,
+                                 HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES);
+       if (!opfn_wq)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void opfn_exit(void)
+{
+       if (opfn_wq) {
+               destroy_workqueue(opfn_wq);
+               opfn_wq = NULL;
+       }
+}
index 1927c98..5f2011c 100644 (file)
 
 /* STL Verbs Extended */
 #define IB_BTHE_E_SHIFT           24
+#define HFI1_VERBS_E_ATOMIC_VADDR U64_MAX
+
+struct ib_atomic_eth;
+
+enum hfi1_opfn_codes {
+       STL_VERBS_EXTD_NONE = 0,
+       STL_VERBS_EXTD_TID_RDMA,
+       STL_VERBS_EXTD_MAX
+};
 
 struct hfi1_opfn_data {
+       u8 extended;
+       u16 requested;
+       u16 completed;
+       enum hfi1_opfn_codes curr;
        /* serialize opfn function calls */
        spinlock_t lock;
+       struct work_struct opfn_work;
 };
 
+/* WR opcode for OPFN */
+#define IB_WR_OPFN IB_WR_RESERVED3
+
+void opfn_send_conn_request(struct work_struct *work);
+void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
+                       struct ib_atomic_eth *ateth);
+void opfn_conn_reply(struct rvt_qp *qp, u64 data);
+void opfn_conn_error(struct rvt_qp *qp);
+void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask);
+void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1);
+int opfn_init(void);
+void opfn_exit(void);
+
 #endif /* _HFI1_OPFN_H */