OSDN Git Service

xprtrdma: Fix max_send_wr computation
authorChuck Lever <chuck.lever@oracle.com>
Fri, 4 May 2018 19:34:48 +0000 (15:34 -0400)
committerAnna Schumaker <Anna.Schumaker@Netapp.com>
Mon, 7 May 2018 13:20:03 +0000 (09:20 -0400)
For FRWR, the computation of max_send_wr is split between
frwr_op_open and rpcrdma_ep_create, which makes it difficult to tell
that the max_send_wr result is currently incorrect if frwr_op_open
has to reduce the credit limit to accommodate a small max_qp_wr.
This is a problem now that extra WRs are needed for backchannel
operations and a drain CQE.

So, refactor the computation so that it is all done in ->ro_open,
and fix the FRWR version of this computation so that it
accommodates HCAs with small max_qp_wr correctly.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
net/sunrpc/xprtrdma/fmr_ops.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/verbs.c

index 5cc68a8..592d1e8 100644 (file)
@@ -159,10 +159,32 @@ out_release:
        fmr_op_release_mr(mr);
 }
 
+/* On success, sets:
+ *     ep->rep_attr.cap.max_send_wr
+ *     ep->rep_attr.cap.max_recv_wr
+ *     cdata->max_requests
+ *     ia->ri_max_segs
+ */
 static int
 fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
            struct rpcrdma_create_data_internal *cdata)
 {
+       int max_qp_wr;
+
+       max_qp_wr = ia->ri_device->attrs.max_qp_wr;
+       max_qp_wr -= RPCRDMA_BACKWARD_WRS;
+       max_qp_wr -= 1;
+       if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
+               return -ENOMEM;
+       if (cdata->max_requests > max_qp_wr)
+               cdata->max_requests = max_qp_wr;
+       ep->rep_attr.cap.max_send_wr = cdata->max_requests;
+       ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
+       ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
+       ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+       ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
+       ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
+
        ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
                                RPCRDMA_MAX_FMR_SGES);
        return 0;
index c5743a0..0f2e108 100644 (file)
@@ -205,12 +205,22 @@ out_release:
        frwr_op_release_mr(mr);
 }
 
+/* On success, sets:
+ *     ep->rep_attr.cap.max_send_wr
+ *     ep->rep_attr.cap.max_recv_wr
+ *     cdata->max_requests
+ *     ia->ri_max_segs
+ *
+ * And these FRWR-related fields:
+ *     ia->ri_max_frwr_depth
+ *     ia->ri_mrtype
+ */
 static int
 frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
             struct rpcrdma_create_data_internal *cdata)
 {
        struct ib_device_attr *attrs = &ia->ri_device->attrs;
-       int depth, delta;
+       int max_qp_wr, depth, delta;
 
        ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
        if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
@@ -244,14 +254,26 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
                } while (delta > 0);
        }
 
-       ep->rep_attr.cap.max_send_wr *= depth;
-       if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) {
-               cdata->max_requests = attrs->max_qp_wr / depth;
+       max_qp_wr = ia->ri_device->attrs.max_qp_wr;
+       max_qp_wr -= RPCRDMA_BACKWARD_WRS;
+       max_qp_wr -= 1;
+       if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
+               return -ENOMEM;
+       if (cdata->max_requests > max_qp_wr)
+               cdata->max_requests = max_qp_wr;
+       ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth;
+       if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
+               cdata->max_requests = max_qp_wr / depth;
                if (!cdata->max_requests)
                        return -EINVAL;
                ep->rep_attr.cap.max_send_wr = cdata->max_requests *
                                               depth;
        }
+       ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
+       ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
+       ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+       ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
+       ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
 
        ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
                                ia->ri_max_frwr_depth);
index 07529ef..62badde 100644 (file)
@@ -501,8 +501,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
                  struct rpcrdma_create_data_internal *cdata)
 {
        struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
-       unsigned int max_qp_wr, max_sge;
        struct ib_cq *sendcq, *recvcq;
+       unsigned int max_sge;
        int rc;
 
        max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
@@ -513,29 +513,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
        }
        ia->ri_max_send_sges = max_sge;
 
-       if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
-               dprintk("RPC:       %s: insufficient wqe's available\n",
-                       __func__);
-               return -ENOMEM;
-       }
-       max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1;
-
-       /* check provider's send/recv wr limits */
-       if (cdata->max_requests > max_qp_wr)
-               cdata->max_requests = max_qp_wr;
+       rc = ia->ri_ops->ro_open(ia, ep, cdata);
+       if (rc)
+               return rc;
 
        ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
        ep->rep_attr.qp_context = ep;
        ep->rep_attr.srq = NULL;
-       ep->rep_attr.cap.max_send_wr = cdata->max_requests;
-       ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
-       ep->rep_attr.cap.max_send_wr += 1;      /* drain cqe */
-       rc = ia->ri_ops->ro_open(ia, ep, cdata);
-       if (rc)
-               return rc;
-       ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
-       ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
-       ep->rep_attr.cap.max_recv_wr += 1;      /* drain cqe */
        ep->rep_attr.cap.max_send_sge = max_sge;
        ep->rep_attr.cap.max_recv_sge = 1;
        ep->rep_attr.cap.max_inline_data = 0;