OSDN Git Service

xprtrdma: Remove MEMWINDOWS registration modes
authorChuck Lever <chuck.lever@oracle.com>
Wed, 28 May 2014 14:32:34 +0000 (10:32 -0400)
committerAnna Schumaker <Anna.Schumaker@Netapp.com>
Wed, 4 Jun 2014 12:56:37 +0000 (08:56 -0400)
The MEMWINDOWS and MEMWINDOWS_ASYNC memory registration modes were
intended as stop-gap modes before the introduction of FRMR. They
are now considered obsolete.

MEMWINDOWS_ASYNC is also considered unsafe because it can leave
client memory registered and exposed for an indeterminant time after
each I/O.

At this point, the MEMWINDOWS modes add needless complexity, so
remove them.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h

index 02b2941..46b5172 100644 (file)
@@ -199,7 +199,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
                return 0;
 
        do {
-               /* bind/register the memory, then build chunk from result. */
                int n = rpcrdma_register_external(seg, nsegs,
                                                cur_wchunk != NULL, r_xprt);
                if (n <= 0)
@@ -698,16 +697,6 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
 }
 
 /*
- * This function is called when memory window unbind which we are waiting
- * for completes. Just use rr_func (zeroed by upcall) to signal completion.
- */
-static void
-rpcrdma_unbind_func(struct rpcrdma_rep *rep)
-{
-       wake_up(&rep->rr_unbind);
-}
-
-/*
  * Called as a tasklet to do req/reply match and complete a request
  * Errors must result in the RPC task either being awakened, or
  * allowed to timeout, to discover the errors at that time.
@@ -721,7 +710,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
        struct rpc_xprt *xprt = rep->rr_xprt;
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
        __be32 *iptr;
-       int i, rdmalen, status;
+       int rdmalen, status;
 
        /* Check status. If bad, signal disconnect and return rep to pool */
        if (rep->rr_len == ~0U) {
@@ -850,27 +839,6 @@ badheader:
                break;
        }
 
-       /* If using mw bind, start the deregister process now. */
-       /* (Note: if mr_free(), cannot perform it here, in tasklet context) */
-       if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) {
-       case RPCRDMA_MEMWINDOWS:
-               for (i = 0; req->rl_nchunks-- > 1;)
-                       i += rpcrdma_deregister_external(
-                               &req->rl_segments[i], r_xprt, NULL);
-               /* Optionally wait (not here) for unbinds to complete */
-               rep->rr_func = rpcrdma_unbind_func;
-               (void) rpcrdma_deregister_external(&req->rl_segments[i],
-                                                  r_xprt, rep);
-               break;
-       case RPCRDMA_MEMWINDOWS_ASYNC:
-               for (i = 0; req->rl_nchunks--;)
-                       i += rpcrdma_deregister_external(&req->rl_segments[i],
-                                                        r_xprt, NULL);
-               break;
-       default:
-               break;
-       }
-
        dprintk("RPC:       %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
                        __func__, xprt, rqst, status);
        xprt_complete_rqst(rqst->rq_task, status);
index 8c5035a..c23b0c1 100644 (file)
@@ -566,9 +566,7 @@ xprt_rdma_free(void *buffer)
                __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
 
        /*
-        * Finish the deregistration. When using mw bind, this was
-        * begun in rpcrdma_reply_handler(). In all other modes, we
-        * do it here, in thread context. The process is considered
+        * Finish the deregistration.  The process is considered
         * complete when the rr_func vector becomes NULL - this
         * was put in place during rpcrdma_reply_handler() - the wait
         * call below will not block if the dereg is "done". If
@@ -580,11 +578,6 @@ xprt_rdma_free(void *buffer)
                        &req->rl_segments[i], r_xprt, NULL);
        }
 
-       if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) {
-               rep->rr_func = NULL;    /* abandon the callback */
-               req->rl_reply = NULL;
-       }
-
        if (req->rl_iov.length == 0) {  /* see allocate above */
                struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer;
                oreq->rl_reply = req->rl_reply;
index 4a4e4ea..304c7ad 100644 (file)
@@ -152,7 +152,7 @@ void rpcrdma_event_process(struct ib_wc *wc)
        dprintk("RPC:       %s: event rep %p status %X opcode %X length %u\n",
                __func__, rep, wc->status, wc->opcode, wc->byte_len);
 
-       if (!rep) /* send or bind completion that we don't care about */
+       if (!rep) /* send completion that we don't care about */
                return;
 
        if (IB_WC_SUCCESS != wc->status) {
@@ -197,8 +197,6 @@ void rpcrdma_event_process(struct ib_wc *wc)
                        }
                        atomic_set(&rep->rr_buffer->rb_credits, credits);
                }
-               /* fall through */
-       case IB_WC_BIND_MW:
                rpcrdma_schedule_tasklet(rep);
                break;
        default:
@@ -233,7 +231,7 @@ rpcrdma_cq_poll(struct ib_cq *cq)
 /*
  * rpcrdma_cq_event_upcall
  *
- * This upcall handles recv, send, bind and unbind events.
+ * This upcall handles recv and send events.
  * It is reentrant but processes single events in order to maintain
  * ordering of receives to keep server credits.
  *
@@ -494,16 +492,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
        }
 
        switch (memreg) {
-       case RPCRDMA_MEMWINDOWS:
-       case RPCRDMA_MEMWINDOWS_ASYNC:
-               if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
-                       dprintk("RPC:       %s: MEMWINDOWS registration "
-                               "specified but not supported by adapter, "
-                               "using slower RPCRDMA_REGISTER\n",
-                               __func__);
-                       memreg = RPCRDMA_REGISTER;
-               }
-               break;
        case RPCRDMA_MTHCAFMR:
                if (!ia->ri_id->device->alloc_fmr) {
 #if RPCRDMA_PERSISTENT_REGISTRATION
@@ -567,16 +555,13 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                                IB_ACCESS_REMOTE_READ;
                goto register_setup;
 #endif
-       case RPCRDMA_MEMWINDOWS_ASYNC:
-       case RPCRDMA_MEMWINDOWS:
-               mem_priv = IB_ACCESS_LOCAL_WRITE |
-                               IB_ACCESS_MW_BIND;
-               goto register_setup;
        case RPCRDMA_MTHCAFMR:
                if (ia->ri_have_dma_lkey)
                        break;
                mem_priv = IB_ACCESS_LOCAL_WRITE;
+#if RPCRDMA_PERSISTENT_REGISTRATION
        register_setup:
+#endif
                ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
                if (IS_ERR(ia->ri_bind_mem)) {
                        printk(KERN_ALERT "%s: ib_get_dma_mr for "
@@ -699,14 +684,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
                }
                break;
        }
-       case RPCRDMA_MEMWINDOWS_ASYNC:
-       case RPCRDMA_MEMWINDOWS:
-               /* Add room for mw_binds+unbinds - overkill! */
-               ep->rep_attr.cap.max_send_wr++;
-               ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS);
-               if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
-                       return -EINVAL;
-               break;
        default:
                break;
        }
@@ -728,14 +705,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 
        /* set trigger for requesting send completion */
        ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /*  - 1*/;
-       switch (ia->ri_memreg_strategy) {
-       case RPCRDMA_MEMWINDOWS_ASYNC:
-       case RPCRDMA_MEMWINDOWS:
-               ep->rep_cqinit -= RPCRDMA_MAX_SEGS;
-               break;
-       default:
-               break;
-       }
        if (ep->rep_cqinit <= 2)
                ep->rep_cqinit = 0;
        INIT_CQCOUNT(ep);
@@ -743,11 +712,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
        init_waitqueue_head(&ep->rep_connect_wait);
        INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
 
-       /*
-        * Create a single cq for receive dto and mw_bind (only ever
-        * care about unbind, really). Send completions are suppressed.
-        * Use single threaded tasklet upcalls to maintain ordering.
-        */
        ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
                                  rpcrdma_cq_async_error_upcall, NULL,
                                  ep->rep_attr.cap.max_recv_wr +
@@ -1020,11 +984,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
                len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
                                sizeof(struct rpcrdma_mw);
                break;
-       case RPCRDMA_MEMWINDOWS_ASYNC:
-       case RPCRDMA_MEMWINDOWS:
-               len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
-                               sizeof(struct rpcrdma_mw);
-               break;
        default:
                break;
        }
@@ -1055,11 +1014,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
        }
        p += cdata->padding;
 
-       /*
-        * Allocate the fmr's, or mw's for mw_bind chunk registration.
-        * We "cycle" the mw's in order to minimize rkey reuse,
-        * and also reduce unbind-to-bind collision.
-        */
        INIT_LIST_HEAD(&buf->rb_mws);
        r = (struct rpcrdma_mw *)p;
        switch (ia->ri_memreg_strategy) {
@@ -1107,21 +1061,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
                        ++r;
                }
                break;
-       case RPCRDMA_MEMWINDOWS_ASYNC:
-       case RPCRDMA_MEMWINDOWS:
-               /* Allocate one extra request's worth, for full cycling */
-               for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
-                       r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1);
-                       if (IS_ERR(r->r.mw)) {
-                               rc = PTR_ERR(r->r.mw);
-                               dprintk("RPC:       %s: ib_alloc_mw"
-                                       " failed %i\n", __func__, rc);
-                               goto out;
-                       }
-                       list_add(&r->mw_list, &buf->rb_mws);
-                       ++r;
-               }
-               break;
        default:
                break;
        }
@@ -1170,7 +1109,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
                memset(rep, 0, sizeof(struct rpcrdma_rep));
                buf->rb_recv_bufs[i] = rep;
                buf->rb_recv_bufs[i]->rr_buffer = buf;
-               init_waitqueue_head(&rep->rr_unbind);
 
                rc = rpcrdma_register_internal(ia, rep->rr_base,
                                len - offsetof(struct rpcrdma_rep, rr_base),
@@ -1204,7 +1142,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 
        /* clean up in reverse order from create
         *   1.  recv mr memory (mr free, then kfree)
-        *   1a. bind mw memory
         *   2.  send mr memory (mr free, then kfree)
         *   3.  padding (if any) [moved to rpcrdma_ep_destroy]
         *   4.  arrays
@@ -1248,15 +1185,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
                                        " failed %i\n",
                                        __func__, rc);
                        break;
-               case RPCRDMA_MEMWINDOWS_ASYNC:
-               case RPCRDMA_MEMWINDOWS:
-                       rc = ib_dealloc_mw(r->r.mw);
-                       if (rc)
-                               dprintk("RPC:       %s:"
-                                       " ib_dealloc_mw"
-                                       " failed %i\n",
-                                       __func__, rc);
-                       break;
                default:
                        break;
                }
@@ -1331,15 +1259,12 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
        req->rl_niovs = 0;
        if (req->rl_reply) {
                buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
-               init_waitqueue_head(&req->rl_reply->rr_unbind);
                req->rl_reply->rr_func = NULL;
                req->rl_reply = NULL;
        }
        switch (ia->ri_memreg_strategy) {
        case RPCRDMA_FRMR:
        case RPCRDMA_MTHCAFMR:
-       case RPCRDMA_MEMWINDOWS_ASYNC:
-       case RPCRDMA_MEMWINDOWS:
                /*
                 * Cycle mw's back in reverse order, and "spin" them.
                 * This delays and scrambles reuse as much as possible.
@@ -1384,8 +1309,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
 
 /*
  * Put reply buffers back into pool when not attached to
- * request. This happens in error conditions, and when
- * aborting unbinds. Pre-decrement counter/array index.
+ * request. This happens in error conditions.
  */
 void
 rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
@@ -1688,74 +1612,6 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
 }
 
 static int
-rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
-                       int *nsegs, int writing, struct rpcrdma_ia *ia,
-                       struct rpcrdma_xprt *r_xprt)
-{
-       int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
-                                 IB_ACCESS_REMOTE_READ);
-       struct ib_mw_bind param;
-       int rc;
-
-       *nsegs = 1;
-       rpcrdma_map_one(ia, seg, writing);
-       param.bind_info.mr = ia->ri_bind_mem;
-       param.wr_id = 0ULL;     /* no send cookie */
-       param.bind_info.addr = seg->mr_dma;
-       param.bind_info.length = seg->mr_len;
-       param.send_flags = 0;
-       param.bind_info.mw_access_flags = mem_priv;
-
-       DECR_CQCOUNT(&r_xprt->rx_ep);
-       rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
-       if (rc) {
-               dprintk("RPC:       %s: failed ib_bind_mw "
-                       "%u@0x%llx status %i\n",
-                       __func__, seg->mr_len,
-                       (unsigned long long)seg->mr_dma, rc);
-               rpcrdma_unmap_one(ia, seg);
-       } else {
-               seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
-               seg->mr_base = param.bind_info.addr;
-               seg->mr_nsegs = 1;
-       }
-       return rc;
-}
-
-static int
-rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
-                       struct rpcrdma_ia *ia,
-                       struct rpcrdma_xprt *r_xprt, void **r)
-{
-       struct ib_mw_bind param;
-       LIST_HEAD(l);
-       int rc;
-
-       BUG_ON(seg->mr_nsegs != 1);
-       param.bind_info.mr = ia->ri_bind_mem;
-       param.bind_info.addr = 0ULL;    /* unbind */
-       param.bind_info.length = 0;
-       param.bind_info.mw_access_flags = 0;
-       if (*r) {
-               param.wr_id = (u64) (unsigned long) *r;
-               param.send_flags = IB_SEND_SIGNALED;
-               INIT_CQCOUNT(&r_xprt->rx_ep);
-       } else {
-               param.wr_id = 0ULL;
-               param.send_flags = 0;
-               DECR_CQCOUNT(&r_xprt->rx_ep);
-       }
-       rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
-       rpcrdma_unmap_one(ia, seg);
-       if (rc)
-               dprintk("RPC:       %s: failed ib_(un)bind_mw,"
-                       " status %i\n", __func__, rc);
-       else
-               *r = NULL;      /* will upcall on completion */
-       return rc;
-}
-
-static int
 rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
                        int *nsegs, int writing, struct rpcrdma_ia *ia)
 {
@@ -1845,12 +1701,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
                rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
                break;
 
-       /* Registration using memory windows */
-       case RPCRDMA_MEMWINDOWS_ASYNC:
-       case RPCRDMA_MEMWINDOWS:
-               rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
-               break;
-
        /* Default registration each time */
        default:
                rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
@@ -1887,11 +1737,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
                rc = rpcrdma_deregister_fmr_external(seg, ia);
                break;
 
-       case RPCRDMA_MEMWINDOWS_ASYNC:
-       case RPCRDMA_MEMWINDOWS:
-               rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r);
-               break;
-
        default:
                rc = rpcrdma_deregister_default_external(seg, ia);
                break;
index c620d13..bf08ee0 100644 (file)
@@ -127,7 +127,6 @@ struct rpcrdma_rep {
        struct rpc_xprt *rr_xprt;       /* needed for request/reply matching */
        void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
        struct list_head rr_list;       /* tasklet list */
-       wait_queue_head_t rr_unbind;    /* optional unbind wait */
        struct ib_sge   rr_iov;         /* for posting */
        struct ib_mr    *rr_handle;     /* handle for mem in rr_iov */
        char    rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
@@ -162,7 +161,6 @@ struct rpcrdma_mr_seg {             /* chunk descriptors */
                struct ib_mr    *rl_mr;         /* if registered directly */
                struct rpcrdma_mw {             /* if registered from region */
                        union {
-                               struct ib_mw    *mw;
                                struct ib_fmr   *fmr;
                                struct {
                                        struct ib_fast_reg_page_list *fr_pgl;