X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=net%2Fsunrpc%2Fxprtrdma%2Frpc_rdma.c;h=63bf0119f949a7a5509379146e5c20859f9f64df;hb=655fec6987b;hp=31a434d2f14360691d1eb07009165d667eb34bcc;hpb=c8b920bb49939a5c6cf1d2d819300f318ea050d2;p=uclinux-h8%2Flinux.git diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 31a434d2f143..63bf0119f949 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -53,14 +53,6 @@ # define RPCDBG_FACILITY RPCDBG_TRANS #endif -enum rpcrdma_chunktype { - rpcrdma_noch = 0, - rpcrdma_readch, - rpcrdma_areadch, - rpcrdma_writech, - rpcrdma_replych -}; - static const char transfertypes[][12] = { "inline", /* no chunks */ "read list", /* some argument via rdma read */ @@ -157,42 +149,6 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; } -static int -rpcrdma_tail_pullup(struct xdr_buf *buf) -{ - size_t tlen = buf->tail[0].iov_len; - size_t skip = tlen & 3; - - /* Do not include the tail if it is only an XDR pad */ - if (tlen < 4) - return 0; - - /* xdr_write_pages() adds a pad at the beginning of the tail - * if the content in "buf->pages" is unaligned. Force the - * tail's actual content to land at the next XDR position - * after the head instead. - */ - if (skip) { - unsigned char *src, *dst; - unsigned int count; - - src = buf->tail[0].iov_base; - dst = buf->head[0].iov_base; - dst += buf->head[0].iov_len; - - src += skip; - tlen -= skip; - - dprintk("RPC: %s: skip=%zu, memmove(%p, %p, %zu)\n", - __func__, skip, dst, src, tlen); - - for (count = tlen; count; count--) - *dst++ = *src++; - } - - return tlen; -} - /* Split "vec" on page boundaries into segments. FMR registers pages, * not a byte range. Other modes coalesce these segments into a single * MR when they can. @@ -503,74 +459,184 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, return iptr; } -/* - * Copy write data inline. - * This function is used for "small" requests. Data which is passed - * to RPC via iovecs (or page list) is copied directly into the - * pre-registered memory buffer for this request. For small amounts - * of data, this is efficient. The cutoff value is tunable. +/* Prepare the RPC-over-RDMA header SGE. */ -static void rpcrdma_inline_pullup(struct rpc_rqst *rqst) +static bool +rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req, + u32 len) { - int i, npages, curlen; - int copy_len; - unsigned char *srcp, *destp; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); - int page_base; - struct page **ppages; + struct rpcrdma_regbuf *rb = req->rl_rdmabuf; + struct ib_sge *sge = &req->rl_send_sge[0]; + + if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) { + if (!__rpcrdma_dma_map_regbuf(ia, rb)) + return false; + sge->addr = rdmab_addr(rb); + sge->lkey = rdmab_lkey(rb); + } + sge->length = len; - destp = rqst->rq_svec[0].iov_base; - curlen = rqst->rq_svec[0].iov_len; - destp += curlen; + ib_dma_sync_single_for_device(ia->ri_device, sge->addr, + sge->length, DMA_TO_DEVICE); + req->rl_send_wr.num_sge++; + return true; +} - dprintk("RPC: %s: destp 0x%p len %d hdrlen %d\n", - __func__, destp, rqst->rq_slen, curlen); +/* Prepare the Send SGEs. The head and tail iovec, and each entry + * in the page list, gets its own SGE. + */ +static bool +rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, + struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) +{ + unsigned int sge_no, page_base, len, remaining; + struct rpcrdma_regbuf *rb = req->rl_sendbuf; + struct ib_device *device = ia->ri_device; + struct ib_sge *sge = req->rl_send_sge; + u32 lkey = ia->ri_pd->local_dma_lkey; + struct page *page, **ppages; + + /* The head iovec is straightforward, as it is already + * DMA-mapped. Sync the content that has changed. + */ + if (!rpcrdma_dma_map_regbuf(ia, rb)) + return false; + sge_no = 1; + sge[sge_no].addr = rdmab_addr(rb); + sge[sge_no].length = xdr->head[0].iov_len; + sge[sge_no].lkey = rdmab_lkey(rb); + ib_dma_sync_single_for_device(device, sge[sge_no].addr, + sge[sge_no].length, DMA_TO_DEVICE); + + /* If there is a Read chunk, the page list is being handled + * via explicit RDMA, and thus is skipped here. However, the + * tail iovec may include an XDR pad for the page list, as + * well as additional content, and may not reside in the + * same page as the head iovec. + */ + if (rtype == rpcrdma_readch) { + len = xdr->tail[0].iov_len; - copy_len = rqst->rq_snd_buf.page_len; + /* Do not include the tail if it is only an XDR pad */ + if (len < 4) + goto out; - if (rqst->rq_snd_buf.tail[0].iov_len) { - curlen = rqst->rq_snd_buf.tail[0].iov_len; - if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) { - memmove(destp + copy_len, - rqst->rq_snd_buf.tail[0].iov_base, curlen); - r_xprt->rx_stats.pullup_copy_count += curlen; + page = virt_to_page(xdr->tail[0].iov_base); + page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; + + /* If the content in the page list is an odd length, + * xdr_write_pages() has added a pad at the beginning + * of the tail iovec. Force the tail's non-pad content + * to land at the next XDR position in the Send message. + */ + page_base += len & 3; + len -= len & 3; + goto map_tail; + } + + /* If there is a page list present, temporarily DMA map + * and prepare an SGE for each page to be sent. + */ + if (xdr->page_len) { + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); + page_base = xdr->page_base & ~PAGE_MASK; + remaining = xdr->page_len; + while (remaining) { + sge_no++; + if (sge_no > RPCRDMA_MAX_SEND_SGES - 2) + goto out_mapping_overflow; + + len = min_t(u32, PAGE_SIZE - page_base, remaining); + sge[sge_no].addr = ib_dma_map_page(device, *ppages, + page_base, len, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(device, sge[sge_no].addr)) + goto out_mapping_err; + sge[sge_no].length = len; + sge[sge_no].lkey = lkey; + + req->rl_mapped_sges++; + ppages++; + remaining -= len; + page_base = 0; } - dprintk("RPC: %s: tail destp 0x%p len %d\n", - __func__, destp + copy_len, curlen); - rqst->rq_svec[0].iov_len += curlen; } - r_xprt->rx_stats.pullup_copy_count += copy_len; - page_base = rqst->rq_snd_buf.page_base; - ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT); - page_base &= ~PAGE_MASK; - npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT; - for (i = 0; copy_len && i < npages; i++) { - curlen = PAGE_SIZE - page_base; - if (curlen > copy_len) - curlen = copy_len; - dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", - __func__, i, destp, copy_len, curlen); - srcp = kmap_atomic(ppages[i]); - memcpy(destp, srcp+page_base, curlen); - kunmap_atomic(srcp); - rqst->rq_svec[0].iov_len += curlen; - destp += curlen; - copy_len -= curlen; - page_base = 0; + /* The tail iovec is not always constructed in the same + * page where the head iovec resides (see, for example, + * gss_wrap_req_priv). To neatly accommodate that case, + * DMA map it separately. + */ + if (xdr->tail[0].iov_len) { + page = virt_to_page(xdr->tail[0].iov_base); + page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; + len = xdr->tail[0].iov_len; + +map_tail: + sge_no++; + sge[sge_no].addr = ib_dma_map_page(device, page, + page_base, len, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(device, sge[sge_no].addr)) + goto out_mapping_err; + sge[sge_no].length = len; + sge[sge_no].lkey = lkey; + req->rl_mapped_sges++; } - /* header now contains entire send message */ + +out: + req->rl_send_wr.num_sge = sge_no + 1; + return true; + +out_mapping_overflow: + pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no); + return false; + +out_mapping_err: + pr_err("rpcrdma: Send mapping error\n"); + return false; +} + +bool +rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, + u32 hdrlen, struct xdr_buf *xdr, + enum rpcrdma_chunktype rtype) +{ + req->rl_send_wr.num_sge = 0; + req->rl_mapped_sges = 0; + + if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen)) + goto out_map; + + if (rtype != rpcrdma_areadch) + if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype)) + goto out_map; + + return true; + +out_map: + pr_err("rpcrdma: failed to DMA map a Send buffer\n"); + return false; +} + +void +rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req) +{ + struct ib_device *device = ia->ri_device; + struct ib_sge *sge; + int count; + + sge = &req->rl_send_sge[2]; + for (count = req->rl_mapped_sges; count--; sge++) + ib_dma_unmap_page(device, sge->addr, sge->length, + DMA_TO_DEVICE); + req->rl_mapped_sges = 0; } /* * Marshal a request: the primary job of this routine is to choose * the transfer modes. See comments below. * - * Prepares up to two IOVs per Call message: - * - * [0] -- RPC RDMA header - * [1] -- the RPC header/data - * * Returns zero on success, otherwise a negative errno. */ @@ -638,12 +704,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) */ if (rpcrdma_args_inline(r_xprt, rqst)) { rtype = rpcrdma_noch; - rpcrdma_inline_pullup(rqst); - rpclen = rqst->rq_svec[0].iov_len; + rpclen = rqst->rq_snd_buf.len; } else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) { rtype = rpcrdma_readch; - rpclen = rqst->rq_svec[0].iov_len; - rpclen += rpcrdma_tail_pullup(&rqst->rq_snd_buf); + rpclen = rqst->rq_snd_buf.head[0].iov_len + + rqst->rq_snd_buf.tail[0].iov_len; } else { r_xprt->rx_stats.nomsg_call_count++; headerp->rm_type = htonl(RDMA_NOMSG); @@ -685,47 +750,21 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) goto out_unmap; hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; - if (hdrlen + rpclen > r_xprt->rx_data.inline_wsize) - goto out_overflow; - dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n", rqst->rq_task->tk_pid, __func__, transfertypes[rtype], transfertypes[wtype], hdrlen, rpclen); - if (!rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, req->rl_rdmabuf)) - goto out_map; - req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); - req->rl_send_iov[0].length = hdrlen; - req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); - - req->rl_send_wr.num_sge = 1; - if (rtype == rpcrdma_areadch) - return 0; - - if (!rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, req->rl_sendbuf)) - goto out_map; - req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); - req->rl_send_iov[1].length = rpclen; - req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf); - - req->rl_send_wr.num_sge = 2; - + if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, + &rqst->rq_snd_buf, rtype)) { + iptr = ERR_PTR(-EIO); + goto out_unmap; + } return 0; -out_overflow: - pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s/%s\n", - hdrlen, rpclen, transfertypes[rtype], transfertypes[wtype]); - iptr = ERR_PTR(-EIO); - out_unmap: r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); return PTR_ERR(iptr); - -out_map: - pr_err("rpcrdma: failed to DMA map a Send buffer\n"); - iptr = ERR_PTR(-EIO); - goto out_unmap; } /*