From 122e5b9f3d370ae11e1502d14ff5c7ea9b144a76 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 18 Jun 2020 17:30:24 -0700 Subject: [PATCH] nvme-tcp: optimize network stack with setting msg flags according to batch size If we have a long list of request to send, signal the network stack that more is coming (MSG_MORE). If we have nothing else, signal MSG_EOR. Signed-off-by: Sagi Grimberg Tested-by: Mark Wunderlich Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 2d3962c164a4..b2e73e19ef01 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -79,6 +79,7 @@ struct nvme_tcp_queue { struct mutex send_mutex; struct llist_head req_list; struct list_head send_list; + bool more_requests; /* recv state */ void *pdu; @@ -277,7 +278,9 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, */ if (queue->io_cpu == smp_processor_id() && sync && empty && mutex_trylock(&queue->send_mutex)) { + queue->more_requests = !last; nvme_tcp_try_send(queue); + queue->more_requests = false; mutex_unlock(&queue->send_mutex); } else if (last) { queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); @@ -877,6 +880,12 @@ done: read_unlock(&sk->sk_callback_lock); } +static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) +{ + return !list_empty(&queue->send_list) || + !llist_empty(&queue->req_list) || queue->more_requests; +} + static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) { queue->request = NULL; @@ -898,7 +907,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) bool last = nvme_tcp_pdu_last_send(req, len); int ret, flags = MSG_DONTWAIT; - if (last && !queue->data_digest) + if (last && !queue->data_digest && !nvme_tcp_queue_more(queue)) flags |= MSG_EOR; else flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; @@ -945,7 +954,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) int flags = MSG_DONTWAIT; int ret; - if (inline_data) + if (inline_data || nvme_tcp_queue_more(queue)) flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; else flags |= MSG_EOR; @@ -1010,12 +1019,17 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; int ret; - struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR }; + struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { .iov_base = &req->ddgst + req->offset, .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset }; + if (nvme_tcp_queue_more(queue)) + msg.msg_flags |= MSG_MORE; + else + msg.msg_flags |= MSG_EOR; + ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (unlikely(ret <= 0)) return ret; -- 2.11.0