OSDN Git Service

IB/hfi1: Add OPFN helper functions for TID RDMA feature
authorKaike Wan <kaike.wan@intel.com>
Thu, 24 Jan 2019 03:20:42 +0000 (19:20 -0800)
committerDoug Ledford <dledford@redhat.com>
Thu, 31 Jan 2019 16:36:05 +0000 (11:36 -0500)
This patch adds the OPFN helper functions to initialize, encode, decode,
and reset OPFN parameters for the TID RDMA feature.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/chip.h
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/opfn.h
drivers/infiniband/hw/hfi1/tid_rdma.c
drivers/infiniband/hw/hfi1/tid_rdma.h
drivers/infiniband/hw/hfi1/verbs.h

index b443642..4d40311 100644 (file)
@@ -5222,6 +5222,17 @@ int is_bx(struct hfi1_devdata *dd)
        return (chip_rev_minor & 0xF0) == 0x10;
 }
 
+/* return true is kernel urg disabled for rcd */
+bool is_urg_masked(struct hfi1_ctxtdata *rcd)
+{
+       u64 mask;
+       u32 is = IS_RCVURGENT_START + rcd->ctxt;
+       u8 bit = is % 64;
+
+       mask = read_csr(rcd->dd, CCE_INT_MASK + (8 * (is / 64)));
+       return !(mask & BIT_ULL(bit));
+}
+
 /*
  * Append string s to buffer buf.  Arguments curp and len are the current
  * position and remaining length, respectively.
index 6b9c8f1..ba3d99e 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _CHIP_H
 #define _CHIP_H
 /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -804,6 +804,7 @@ void clear_linkup_counters(struct hfi1_devdata *dd);
 u32 hdrqempty(struct hfi1_ctxtdata *rcd);
 int is_ax(struct hfi1_devdata *dd);
 int is_bx(struct hfi1_devdata *dd);
+bool is_urg_masked(struct hfi1_ctxtdata *rcd);
 u32 read_physical_state(struct hfi1_devdata *dd);
 u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
 const char *opa_lstate_name(u32 lstate);
index 7835eb5..2ba5a2a 100644 (file)
@@ -927,6 +927,8 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
                lastfail = hfi1_create_rcvhdrq(dd, rcd);
                if (!lastfail)
                        lastfail = hfi1_setup_eagerbufs(rcd);
+               if (!lastfail)
+                       lastfail = hfi1_kern_exp_rcv_init(rcd, reinit);
                if (lastfail) {
                        dd_dev_err(dd,
                                   "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
index 1a2b344..1927c98 100644 (file)
@@ -50,4 +50,9 @@
 /* STL Verbs Extended */
 #define IB_BTHE_E_SHIFT           24
 
+struct hfi1_opfn_data {
+       /* serialize opfn function calls */
+       spinlock_t lock;
+};
+
 #endif /* _HFI1_OPFN_H */
index da1ecb6..a8fd66f 100644 (file)
@@ -8,6 +8,208 @@
 #include "verbs.h"
 #include "tid_rdma.h"
 
+/*
+ * J_KEY for kernel contexts when TID RDMA is used.
+ * See generate_jkey() in hfi.h for more information.
+ */
+#define TID_RDMA_JKEY                   32
+#define HFI1_KERNEL_MIN_JKEY HFI1_ADMIN_JKEY_RANGE
+#define HFI1_KERNEL_MAX_JKEY (2 * HFI1_ADMIN_JKEY_RANGE - 1)
+
+#define TID_RDMA_MAX_READ_SEGS_PER_REQ  6
+#define TID_RDMA_MAX_WRITE_SEGS_PER_REQ 4
+
+#define TID_OPFN_QP_CTXT_MASK 0xff
+#define TID_OPFN_QP_CTXT_SHIFT 56
+#define TID_OPFN_QP_KDETH_MASK 0xff
+#define TID_OPFN_QP_KDETH_SHIFT 48
+#define TID_OPFN_MAX_LEN_MASK 0x7ff
+#define TID_OPFN_MAX_LEN_SHIFT 37
+#define TID_OPFN_TIMEOUT_MASK 0x1f
+#define TID_OPFN_TIMEOUT_SHIFT 32
+#define TID_OPFN_RESERVED_MASK 0x3f
+#define TID_OPFN_RESERVED_SHIFT 26
+#define TID_OPFN_URG_MASK 0x1
+#define TID_OPFN_URG_SHIFT 25
+#define TID_OPFN_VER_MASK 0x7
+#define TID_OPFN_VER_SHIFT 22
+#define TID_OPFN_JKEY_MASK 0x3f
+#define TID_OPFN_JKEY_SHIFT 16
+#define TID_OPFN_MAX_READ_MASK 0x3f
+#define TID_OPFN_MAX_READ_SHIFT 10
+#define TID_OPFN_MAX_WRITE_MASK 0x3f
+#define TID_OPFN_MAX_WRITE_SHIFT 4
+
+/*
+ * OPFN TID layout
+ *
+ * 63               47               31               15
+ * NNNNNNNNKKKKKKKK MMMMMMMMMMMTTTTT DDDDDDUVVVJJJJJJ RRRRRRWWWWWWCCCC
+ * 3210987654321098 7654321098765432 1098765432109876 5432109876543210
+ * N - the context Number
+ * K - the Kdeth_qp
+ * M - Max_len
+ * T - Timeout
+ * D - reserveD
+ * V - version
+ * U - Urg capable
+ * J - Jkey
+ * R - max_Read
+ * W - max_Write
+ * C - Capcode
+ */
+
+static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
+{
+       return
+               (((u64)p->qp & TID_OPFN_QP_CTXT_MASK) <<
+                       TID_OPFN_QP_CTXT_SHIFT) |
+               ((((u64)p->qp >> 16) & TID_OPFN_QP_KDETH_MASK) <<
+                       TID_OPFN_QP_KDETH_SHIFT) |
+               (((u64)((p->max_len >> PAGE_SHIFT) - 1) &
+                       TID_OPFN_MAX_LEN_MASK) << TID_OPFN_MAX_LEN_SHIFT) |
+               (((u64)p->timeout & TID_OPFN_TIMEOUT_MASK) <<
+                       TID_OPFN_TIMEOUT_SHIFT) |
+               (((u64)p->urg & TID_OPFN_URG_MASK) << TID_OPFN_URG_SHIFT) |
+               (((u64)p->jkey & TID_OPFN_JKEY_MASK) << TID_OPFN_JKEY_SHIFT) |
+               (((u64)p->max_read & TID_OPFN_MAX_READ_MASK) <<
+                       TID_OPFN_MAX_READ_SHIFT) |
+               (((u64)p->max_write & TID_OPFN_MAX_WRITE_MASK) <<
+                       TID_OPFN_MAX_WRITE_SHIFT);
+}
+
+static void tid_rdma_opfn_decode(struct tid_rdma_params *p, u64 data)
+{
+       p->max_len = (((data >> TID_OPFN_MAX_LEN_SHIFT) &
+               TID_OPFN_MAX_LEN_MASK) + 1) << PAGE_SHIFT;
+       p->jkey = (data >> TID_OPFN_JKEY_SHIFT) & TID_OPFN_JKEY_MASK;
+       p->max_write = (data >> TID_OPFN_MAX_WRITE_SHIFT) &
+               TID_OPFN_MAX_WRITE_MASK;
+       p->max_read = (data >> TID_OPFN_MAX_READ_SHIFT) &
+               TID_OPFN_MAX_READ_MASK;
+       p->qp =
+               ((((data >> TID_OPFN_QP_KDETH_SHIFT) & TID_OPFN_QP_KDETH_MASK)
+                       << 16) |
+               ((data >> TID_OPFN_QP_CTXT_SHIFT) & TID_OPFN_QP_CTXT_MASK));
+       p->urg = (data >> TID_OPFN_URG_SHIFT) & TID_OPFN_URG_MASK;
+       p->timeout = (data >> TID_OPFN_TIMEOUT_SHIFT) & TID_OPFN_TIMEOUT_MASK;
+}
+
+void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+
+       p->qp = (kdeth_qp << 16) | priv->rcd->ctxt;
+       p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
+       p->jkey = priv->rcd->jkey;
+       p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
+       p->max_write = TID_RDMA_MAX_WRITE_SEGS_PER_REQ;
+       p->timeout = qp->timeout;
+       p->urg = is_urg_masked(priv->rcd);
+}
+
+bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+
+       *data = tid_rdma_opfn_encode(&priv->tid_rdma.local);
+       return true;
+}
+
+bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+       struct tid_rdma_params *remote, *old;
+       bool ret = true;
+
+       old = rcu_dereference_protected(priv->tid_rdma.remote,
+                                       lockdep_is_held(&priv->opfn.lock));
+       data &= ~0xfULL;
+       /*
+        * If data passed in is zero, return true so as not to continue the
+        * negotiation process
+        */
+       if (!data || !HFI1_CAP_IS_KSET(TID_RDMA))
+               goto null;
+       /*
+        * If kzalloc fails, return false. This will result in:
+        * * at the requester a new OPFN request being generated to retry
+        *   the negotiation
+        * * at the responder, 0 being returned to the requester so as to
+        *   disable TID RDMA at both the requester and the responder
+        */
+       remote = kzalloc(sizeof(*remote), GFP_ATOMIC);
+       if (!remote) {
+               ret = false;
+               goto null;
+       }
+
+       tid_rdma_opfn_decode(remote, data);
+       priv->tid_timer_timeout_jiffies =
+               usecs_to_jiffies((((4096UL * (1UL << remote->timeout)) /
+                                  1000UL) << 3) * 7);
+       rcu_assign_pointer(priv->tid_rdma.remote, remote);
+       /*
+        * A TID RDMA READ request's segment size is not equal to
+        * remote->max_len only when the request's data length is smaller
+        * than remote->max_len. In that case, there will be only one segment.
+        * Therefore, when priv->pkts_ps is used to calculate req->cur_seg
+        * during retry, it will lead to req->cur_seg = 0, which is exactly
+        * what is expected.
+        */
+       priv->pkts_ps = (u16)rvt_div_mtu(qp, remote->max_len);
+       priv->timeout_shift = ilog2(priv->pkts_ps - 1) + 1;
+       goto free;
+null:
+       RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
+       priv->timeout_shift = 0;
+free:
+       if (old)
+               kfree_rcu(old, rcu_head);
+       return ret;
+}
+
+bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data)
+{
+       bool ret;
+
+       ret = tid_rdma_conn_reply(qp, *data);
+       *data = 0;
+       /*
+        * If tid_rdma_conn_reply() returns error, set *data as 0 to indicate
+        * TID RDMA could not be enabled. This will result in TID RDMA being
+        * disabled at the requester too.
+        */
+       if (ret)
+               (void)tid_rdma_conn_req(qp, data);
+       return ret;
+}
+
+void tid_rdma_conn_error(struct rvt_qp *qp)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+       struct tid_rdma_params *old;
+
+       old = rcu_dereference_protected(priv->tid_rdma.remote,
+                                       lockdep_is_held(&priv->opfn.lock));
+       RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
+       if (old)
+               kfree_rcu(old, rcu_head);
+}
+
+/* This is called at context initialization time */
+int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
+{
+       if (reinit)
+               return 0;
+
+       BUILD_BUG_ON(TID_RDMA_JKEY < HFI1_KERNEL_MIN_JKEY);
+       BUILD_BUG_ON(TID_RDMA_JKEY > HFI1_KERNEL_MAX_JKEY);
+       rcd->jkey = TID_RDMA_JKEY;
+       hfi1_set_ctxt_jkey(rcd->dd, rcd, rcd->jkey);
+       return 0;
+}
+
 /**
  * qp_to_rcd - determine the receive context used by a qp
  * @qp - the qp
index 6fcd3ad..18c6d43 100644 (file)
@@ -6,8 +6,34 @@
 #ifndef HFI1_TID_RDMA_H
 #define HFI1_TID_RDMA_H
 
+#define TID_RDMA_MAX_SEGMENT_SIZE       BIT(18)   /* 256 KiB (for now) */
+
+struct tid_rdma_params {
+       struct rcu_head rcu_head;
+       u32 qp;
+       u32 max_len;
+       u16 jkey;
+       u8 max_read;
+       u8 max_write;
+       u8 timeout;
+       u8 urg;
+       u8 version;
+};
+
+struct tid_rdma_qp_params {
+       struct tid_rdma_params local;
+       struct tid_rdma_params __rcu *remote;
+};
+
+bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data);
+bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data);
+bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data);
+void tid_rdma_conn_error(struct rvt_qp *qp);
+void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p);
+
+int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit);
+
 int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
                      struct ib_qp_init_attr *init_attr);
 
 #endif /* HFI1_TID_RDMA_H */
-
index 8834119..c8baa1e 100644 (file)
@@ -161,8 +161,13 @@ struct hfi1_qp_priv {
        struct hfi1_ctxtdata *rcd;                /* QP's receive context */
        u8 s_sc;                                  /* SC[0..4] for next packet */
        struct iowait s_iowait;
+       struct hfi1_opfn_data opfn;
+       struct tid_rdma_qp_params tid_rdma;
        struct rvt_qp *owner;
        u8 hdr_type; /* 9B or 16B */
+       unsigned long tid_timer_timeout_jiffies;
+       u16 pkts_ps;            /* packets per segment */
+       u8 timeout_shift;       /* account for number of packets per segment */
 };
 
 /*