From: Trond Myklebust Date: Mon, 25 Feb 2019 13:39:26 +0000 (-0500) Subject: Merge tag 'nfs-rdma-for-5.1-1' of git://git.linux-nfs.org/projects/anna/linux-nfs X-Git-Tag: v5.1-rc1~10^2~42 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=06b5fc3ad94eebf25d5abc07f84e16b8b33dcf8c;p=uclinux-h8%2Flinux.git Merge tag 'nfs-rdma-for-5.1-1' of git://git.linux-nfs.org/projects/anna/linux-nfs NFSoRDMA client updates for 5.1 New features: - Convert rpc auth layer to use xdr_streams - Config option to disable insecure enctypes - Reduce size of RPC receive buffers Bugfixes and cleanups: - Fix sparse warnings - Check inline size before providing a write chunk - Reduce the receive doorbell rate - Various tracepoint improvements [Trond: Fix up merge conflicts] Signed-off-by: Trond Myklebust --- 06b5fc3ad94eebf25d5abc07f84e16b8b33dcf8c diff --cc net/sunrpc/xprtsock.c index 8fa74c8a4fd8,ae09d850cd11..53de72d2dded --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@@ -742,33 -696,95 +742,56 @@@ xs_stream_start_connect(struct sock_xpr #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) +static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek) +{ + if (seek) + iov_iter_advance(&msg->msg_iter, seek); + return sock_sendmsg(sock, msg); +} + +static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek) +{ + iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len); + return xs_sendmsg(sock, msg, seek); +} + +static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base) +{ + int err; + + err = xdr_alloc_bvec(xdr, GFP_KERNEL); + if (err < 0) + return err; + + iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, + xdr_buf_pagecount(xdr), + xdr->page_len + xdr->page_base); + return xs_sendmsg(sock, msg, base + xdr->page_base); +} + ++#define xs_record_marker_len() sizeof(rpc_fraghdr) ++ + /* Common case: + * - stream transport + * - sending from byte 0 of the message + * - the message is wholly contained in @xdr's head iovec + */ -static int xs_send_rm_and_kvec(struct socket *sock, struct xdr_buf *xdr, - unsigned int remainder) ++static int xs_send_rm_and_kvec(struct socket *sock, struct msghdr *msg, ++ rpc_fraghdr marker, struct kvec *vec, size_t base) + { - struct msghdr msg = { - .msg_flags = XS_SENDMSG_FLAGS | (remainder ? MSG_MORE : 0) - }; - rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | - (u32)xdr->len); + struct kvec iov[2] = { - { ++ [0] = { + .iov_base = &marker, + .iov_len = sizeof(marker) + }, - { - .iov_base = xdr->head[0].iov_base, - .iov_len = xdr->head[0].iov_len - }, - }; - int ret; - - ret = kernel_sendmsg(sock, &msg, iov, 2, - iov[0].iov_len + iov[1].iov_len); - if (ret < 0) - return ret; - if (ret < iov[0].iov_len) - return -EPIPE; - return ret - iov[0].iov_len; -} - -static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more) -{ - struct msghdr msg = { - .msg_name = addr, - .msg_namelen = addrlen, - .msg_flags = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0), - }; - struct kvec iov = { - .iov_base = vec->iov_base + base, - .iov_len = vec->iov_len - base, ++ [1] = *vec, + }; ++ size_t len = iov[0].iov_len + iov[1].iov_len; + - if (iov.iov_len != 0) - return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); - return kernel_sendmsg(sock, &msg, NULL, 0, 0); -} - -static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p) -{ - ssize_t (*do_sendpage)(struct socket *sock, struct page *page, - int offset, size_t size, int flags); - struct page **ppage; - unsigned int remainder; - int err; - - remainder = xdr->page_len - base; - base += xdr->page_base; - ppage = xdr->pages + (base >> PAGE_SHIFT); - base &= ~PAGE_MASK; - do_sendpage = sock->ops->sendpage; - if (!zerocopy) - do_sendpage = sock_no_sendpage; - for(;;) { - unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder); - int flags = XS_SENDMSG_FLAGS; - - remainder -= len; - if (more) - flags |= MSG_MORE; - if (remainder != 0) - flags |= MSG_SENDPAGE_NOTLAST | MSG_MORE; - err = do_sendpage(sock, *ppage, base, len, flags); - if (remainder == 0 || err != len) - break; - *sent_p += err; - ppage++; - base = 0; - } - if (err > 0) { - *sent_p += err; - err = 0; - } - return err; ++ iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len); ++ return xs_sendmsg(sock, msg, base); + } + /** * xs_sendpages - write pages directly to a socket * @sock: socket to send on @@@ -776,34 -792,38 +799,42 @@@ * @addrlen: UDP only -- length of destination address * @xdr: buffer containing this request * @base: starting position in the buffer - * @zerocopy: true if it is safe to use sendpage() ++ * @rm: stream record marker field * @sent_p: return the total number of bytes successfully queued for sending * */ - static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, int *sent_p) -static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p) ++static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, rpc_fraghdr rm, int *sent_p) { - unsigned int remainder = xdr->len - base; + struct msghdr msg = { + .msg_name = addr, + .msg_namelen = addrlen, + .msg_flags = XS_SENDMSG_FLAGS | MSG_MORE, + }; - unsigned int remainder = xdr->len - base; ++ unsigned int rmsize = rm ? sizeof(rm) : 0; ++ unsigned int remainder = rmsize + xdr->len - base; ++ unsigned int want; int err = 0; - int sent = 0; if (unlikely(!sock)) return -ENOTSOCK; - if (base < xdr->head[0].iov_len) { - if (base != 0) { - addr = NULL; - addrlen = 0; - } - - if (base < xdr->head[0].iov_len || addr != NULL) { -- unsigned int len = xdr->head[0].iov_len - base; ++ want = xdr->head[0].iov_len + rmsize; ++ if (base < want) { ++ unsigned int len = want - base; remainder -= len; - if (!base && !addr) - err = xs_send_rm_and_kvec(sock, xdr, remainder); + if (remainder == 0) + msg.msg_flags &= ~MSG_MORE; - err = xs_send_kvec(sock, &msg, &xdr->head[0], base); ++ if (rmsize) ++ err = xs_send_rm_and_kvec(sock, &msg, rm, ++ &xdr->head[0], base); + else - err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], - base, remainder != 0); ++ err = xs_send_kvec(sock, &msg, &xdr->head[0], base); if (remainder == 0 || err != len) goto out; *sent_p += err; base = 0; } else -- base -= xdr->head[0].iov_len; ++ base -= want; if (base < xdr->page_len) { unsigned int len = xdr->page_len - base; @@@ -890,16 -907,6 +921,17 @@@ xs_send_request_was_aborted(struct sock return transport->xmit.offset != 0 && req->rq_bytes_sent == 0; } +/* - * Construct a stream transport record marker in @buf. ++ * Return the stream record marker field for a record of length < 2^31-1 + */ - static inline void xs_encode_stream_record_marker(struct xdr_buf *buf) ++static rpc_fraghdr ++xs_stream_record_marker(struct xdr_buf *xdr) +{ - u32 reclen = buf->len - sizeof(rpc_fraghdr); - rpc_fraghdr *base = buf->head[0].iov_base; - *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen); ++ if (!xdr->len) ++ return 0; ++ return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len); +} + /** * xs_local_send_request - write an RPC request to an AF_LOCAL socket * @req: pointer to RPC request @@@ -934,7 -939,7 +964,8 @@@ static int xs_local_send_request(struc req->rq_xtime = ktime_get(); status = xs_sendpages(transport->sock, NULL, 0, xdr, transport->xmit.offset, - true, &sent); ++ xs_stream_record_marker(xdr), + &sent); dprintk("RPC: %s(%u) = %d\n", __func__, xdr->len - transport->xmit.offset, status); @@@ -1001,7 -1007,7 +1032,7 @@@ static int xs_udp_send_request(struct r req->rq_xtime = ktime_get(); status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, - xdr, 0, &sent); - xdr, 0, true, &sent); ++ xdr, 0, 0, &sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len, status); @@@ -1093,7 -1104,7 +1122,8 @@@ static int xs_tcp_send_request(struct r sent = 0; status = xs_sendpages(transport->sock, NULL, 0, xdr, transport->xmit.offset, - zerocopy, &sent); ++ xs_stream_record_marker(xdr), + &sent); dprintk("RPC: xs_tcp_send_request(%u) = %d\n", xdr->len - transport->xmit.offset, status);