OSDN Git Service

IB/hfi1: Integrate OPFN into RC transactions
authorKaike Wan <kaike.wan@intel.com>
Thu, 24 Jan 2019 03:21:11 +0000 (19:21 -0800)
committerDoug Ledford <dledford@redhat.com>
Thu, 31 Jan 2019 16:37:34 +0000 (11:37 -0500)
OPFN parameter negotiation allows a pair of connected RC QPs to exchange
a set of parameters in succession. This negotiation does not commence
till the first ULP request. Because OPFN operations are operations
private to the driver, they do not generate user completions or put the
QP into error when they run out of retries. This patch integrates the
OPFN protocol into the transactions of an RC QP.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/qp.c
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/hfi1/tid_rdma.c
drivers/infiniband/hw/hfi1/tid_rdma.h
drivers/infiniband/hw/hfi1/verbs.c

index 09c898d..a8dbd0f 100644 (file)
@@ -1498,6 +1498,12 @@ static int __init hfi1_mod_init(void)
        /* sanitize link CRC options */
        link_crc_mask &= SUPPORTED_CRCS;
 
+       ret = opfn_init();
+       if (ret < 0) {
+               pr_err("Failed to allocate opfn_wq");
+               goto bail_dev;
+       }
+
        /*
         * These must be called before the driver is registered with
         * the PCI subsystem.
@@ -1528,6 +1534,7 @@ module_init(hfi1_mod_init);
 static void __exit hfi1_mod_cleanup(void)
 {
        pci_unregister_driver(&hfi1_pci_driver);
+       opfn_exit();
        node_affinity_destroy_all();
        hfi1_dbg_exit();
 
index 5344e89..f822f92 100644 (file)
@@ -132,6 +132,12 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
        .qpt_support = BIT(IB_QPT_RC),
 },
 
+[IB_WR_OPFN] = {
+       .length = sizeof(struct ib_atomic_wr),
+       .qpt_support = BIT(IB_QPT_RC),
+       .flags = RVT_OPERATION_USE_RESERVE,
+},
+
 };
 
 static void flush_list_head(struct list_head *l)
@@ -285,6 +291,8 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
                priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
                qp_set_16b(qp);
        }
+
+       opfn_qp_init(qp, attr, attr_mask);
 }
 
 /**
@@ -696,6 +704,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 {
        struct hfi1_qp_priv *priv = qp->priv;
 
+       hfi1_qp_priv_tid_free(rdi, qp);
        kfree(priv->s_ahg);
        kfree(priv);
 }
@@ -751,6 +760,10 @@ void notify_qp_reset(struct rvt_qp *qp)
 {
        qp->r_adefered = 0;
        clear_ahg(qp);
+
+       /* Clear any OPFN state */
+       if (qp->ibqp.qp_type == IB_QPT_RC)
+               opfn_conn_error(qp);
 }
 
 /*
index 8970fc7..092d5eb 100644 (file)
 /* cut down ridiculously long IB macro names */
 #define OP(x) RC_OP(x)
 
+static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
+                                        struct rvt_swqe *wqe,
+                                        struct hfi1_ibport *ibp);
+
 static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
                       u32 psn, u32 pmtu)
 {
@@ -517,10 +521,14 @@ no_flow_control:
                                        goto bail;
                                }
                                qp->s_num_rd_atomic++;
-                               if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
-                                       qp->s_lsn++;
                        }
-                       if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+
+                       /* FALLTHROUGH */
+               case IB_WR_OPFN:
+                       if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
+                               qp->s_lsn++;
+                       if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+                           wqe->wr.opcode == IB_WR_OPFN) {
                                qp->s_state = OP(COMPARE_SWAP);
                                put_ib_ateth_swap(wqe->atomic_wr.swap,
                                                  &ohdr->u.atomic_eth);
@@ -1040,6 +1048,7 @@ done:
  */
 void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 {
+       struct hfi1_qp_priv *priv = qp->priv;
        struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
        struct hfi1_ibport *ibp;
 
@@ -1050,8 +1059,26 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
                        hfi1_migrate_qp(qp);
                        qp->s_retry = qp->s_retry_cnt;
                } else if (qp->s_last == qp->s_acked) {
-                       rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
-                       rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+                       /*
+                        * We need special handling for the OPFN request WQEs as
+                        * they are not allowed to generate real user errors
+                        */
+                       if (wqe->wr.opcode == IB_WR_OPFN) {
+                               struct hfi1_ibport *ibp =
+                                       to_iport(qp->ibqp.device, qp->port_num);
+                               /*
+                                * Call opfn_conn_reply() with capcode and
+                                * remaining data as 0 to close out the
+                                * current request
+                                */
+                               opfn_conn_reply(qp, priv->opfn.curr);
+                               wqe = do_rc_completion(qp, wqe, ibp);
+                               qp->s_flags &= ~RVT_S_WAIT_ACK;
+                       } else {
+                               rvt_send_complete(qp, wqe,
+                                                 IB_WC_RETRY_EXC_ERR);
+                               rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+                       }
                        return;
                } else { /* need to handle delayed completion */
                        return;
@@ -1363,6 +1390,9 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
                        u64 *vaddr = wqe->sg_list[0].vaddr;
                        *vaddr = val;
                }
+               if (wqe->wr.opcode == IB_WR_OPFN)
+                       opfn_conn_reply(qp, val);
+
                if (qp->s_num_rd_atomic &&
                    (wqe->wr.opcode == IB_WR_RDMA_READ ||
                     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
@@ -2068,6 +2098,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
                return;
 
        fecn = process_ecn(qp, packet);
+       opfn_trigger_conn_request(qp, be32_to_cpu(ohdr->bth[1]));
 
        /*
         * Process responses (ACKs) before anything else.  Note that the
@@ -2363,15 +2394,18 @@ send_last:
 
        case OP(COMPARE_SWAP):
        case OP(FETCH_ADD): {
-               struct ib_atomic_eth *ateth;
+               struct ib_atomic_eth *ateth = &ohdr->u.atomic_eth;
+               u64 vaddr = get_ib_ateth_vaddr(ateth);
+               bool opfn = opcode == OP(COMPARE_SWAP) &&
+                       vaddr == HFI1_VERBS_E_ATOMIC_VADDR;
                struct rvt_ack_entry *e;
-               u64 vaddr;
                atomic64_t *maddr;
                u64 sdata;
                u32 rkey;
                u8 next;
 
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+                            !opfn))
                        goto nack_inv;
                next = qp->r_head_ack_queue + 1;
                if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
@@ -2387,8 +2421,11 @@ send_last:
                        rvt_put_mr(e->rdma_sge.mr);
                        e->rdma_sge.mr = NULL;
                }
-               ateth = &ohdr->u.atomic_eth;
-               vaddr = get_ib_ateth_vaddr(ateth);
+               /* Process OPFN special virtual address */
+               if (opfn) {
+                       opfn_conn_response(qp, e, ateth);
+                       goto ack;
+               }
                if (unlikely(vaddr & (sizeof(u64) - 1)))
                        goto nack_inv_unlck;
                rkey = be32_to_cpu(ateth->rkey);
@@ -2407,6 +2444,7 @@ send_last:
                                      sdata);
                rvt_put_mr(qp->r_sge.sge.mr);
                qp->r_sge.num_sge = 0;
+ack:
                e->opcode = opcode;
                e->sent = 0;
                e->psn = psn;
index a8fd66f..0c9f313 100644 (file)
@@ -246,5 +246,16 @@ int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 
        qpriv->rcd = qp_to_rcd(rdi, qp);
 
+       spin_lock_init(&qpriv->opfn.lock);
+       INIT_WORK(&qpriv->opfn.opfn_work, opfn_send_conn_request);
+
        return 0;
 }
+
+void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+
+       if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA))
+               cancel_work_sync(&priv->opfn.opfn_work);
+}
index 18c6d43..ee81515 100644 (file)
@@ -35,5 +35,6 @@ int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit);
 
 int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
                      struct ib_qp_init_attr *init_attr);
+void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
 
 #endif /* HFI1_TID_RDMA_H */
index ec3899c..571bfd5 100644 (file)
@@ -1735,6 +1735,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
        dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
        dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
        dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
+       dd->verbs_dev.rdi.dparms.reserved_operations = 1;
        dd->verbs_dev.rdi.dparms.extra_rdma_atomic = 1;
 
        /* post send table */