From dea262094cdf629a3380061722b205de04057799 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:27:25 -0700 Subject: [PATCH] IB/srpt: Change default behavior from using SRQ to using RC Although in the RC mode more resources are needed that mode has three advantages over SRQ: - It works with all RDMA adapters, even those that do not support SRQ. - Posting WRs and polling WCs does not trigger lock contention because only one thread at a time accesses a WR or WC queue in non-SRQ mode. - The end-to-end flow control mechanism is used. >From the IB spec: C9-150.2.1: For QPs that are not associated with an SRQ, each HCA receive queue shall generate end-to-end flow control credits. If a QP is associated with an SRQ, the HCA receive queue shall not generate end-to-end flow control credits. Add new configfs attributes that allow to configure which mode to use (/sys/kernel/config/target/srpt/$GUID/$GUID/attrib/use_srq). Note: only the attribute for port 1 is relevant on multi-port adapters. Signed-off-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.c | 152 ++++++++++++++++++++++++++-------- drivers/infiniband/ulp/srpt/ib_srpt.h | 6 ++ 2 files changed, 123 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 6cf95ad870cc..304855b9b537 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -295,6 +295,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, { struct srpt_device *sdev = sport->sdev; struct ib_dm_ioc_profile *iocp; + int send_queue_depth; iocp = (struct ib_dm_ioc_profile *)mad->data; @@ -310,6 +311,12 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, return; } + if (sdev->use_srq) + send_queue_depth = sdev->srq_size; + else + send_queue_depth = min(SRPT_RQ_SIZE, + sdev->device->attrs.max_qp_wr); + memset(iocp, 0, sizeof(*iocp)); strcpy(iocp->id_string, SRPT_ID_STRING); iocp->guid = cpu_to_be64(srpt_service_guid); @@ -322,7 +329,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS); iocp->protocol = cpu_to_be16(SRP_PROTOCOL); iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION); - iocp->send_queue_depth = cpu_to_be16(sdev->srq_size); + iocp->send_queue_depth = cpu_to_be16(send_queue_depth); iocp->rdma_read_depth = 4; iocp->send_size = cpu_to_be32(srp_max_req_size); iocp->rdma_size = cpu_to_be32(min(sport->port_attrib.srp_max_rdma_size, @@ -686,6 +693,9 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, { int i; + if (!ioctx_ring) + return; + for (i = 0; i < ring_size; ++i) srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); kfree(ioctx_ring); @@ -757,7 +767,7 @@ static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx, /** * srpt_post_recv() - Post an IB receive request. */ -static int srpt_post_recv(struct srpt_device *sdev, +static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *ioctx) { struct ib_sge list; @@ -774,7 +784,10 @@ static int srpt_post_recv(struct srpt_device *sdev, wr.sg_list = &list; wr.num_sge = 1; - return ib_post_srq_recv(sdev->srq, &wr, &bad_wr); + if (sdev->use_srq) + return ib_post_srq_recv(sdev->srq, &wr, &bad_wr); + else + return ib_post_recv(ch->qp, &wr, &bad_wr); } /** @@ -1517,7 +1530,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, break; } - srpt_post_recv(ch->sport->sdev, recv_ioctx); + srpt_post_recv(ch->sport->sdev, ch, recv_ioctx); return; out_wait: @@ -1616,7 +1629,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) struct srpt_device *sdev = sport->sdev; const struct ib_device_attr *attrs = &sdev->device->attrs; u32 srp_sq_size = sport->port_attrib.srp_sq_size; - int ret; + int i, ret; WARN_ON(ch->rq_size < 1); @@ -1640,7 +1653,6 @@ retry: = (void(*)(struct ib_event *, void*))srpt_qp_event; qp_init->send_cq = ch->cq; qp_init->recv_cq = ch->cq; - qp_init->srq = sdev->srq; qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; qp_init->qp_type = IB_QPT_RC; /* @@ -1654,6 +1666,12 @@ retry: qp_init->cap.max_rdma_ctxs = srp_sq_size / 2; qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); qp_init->port_num = ch->sport->port; + if (sdev->use_srq) { + qp_init->srq = sdev->srq; + } else { + qp_init->cap.max_recv_wr = ch->rq_size; + qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge; + } ch->qp = ib_create_qp(sdev->pd, qp_init); if (IS_ERR(ch->qp)) { @@ -1669,6 +1687,10 @@ retry: goto err_destroy_cq; } + if (!sdev->use_srq) + for (i = 0; i < ch->rq_size; i++) + srpt_post_recv(sdev, ch, ch->ioctx_recv_ring[i]); + atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr); pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n", @@ -1818,6 +1840,10 @@ static void srpt_release_channel_work(struct work_struct *w) ch->sport->sdev, ch->rq_size, ch->rsp_size, DMA_TO_DEVICE); + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, + sdev, ch->rq_size, + srp_max_req_size, DMA_FROM_DEVICE); + mutex_lock(&sdev->mutex); list_del_init(&ch->list); if (ch->release_done) @@ -1975,6 +2001,19 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ch->ioctx_ring[i]->ch = ch; list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list); } + if (!sdev->use_srq) { + ch->ioctx_recv_ring = (struct srpt_recv_ioctx **) + srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, + sizeof(*ch->ioctx_recv_ring[0]), + srp_max_req_size, + DMA_FROM_DEVICE); + if (!ch->ioctx_recv_ring) { + pr_err("rejected SRP_LOGIN_REQ because creating a new QP RQ ring failed.\n"); + rej->reason = + cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + goto free_ring; + } + } ret = srpt_create_ch_ib(ch); if (ret) { @@ -1982,7 +2021,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because creating" " a new RDMA channel failed.\n"); - goto free_ring; + goto free_recv_ring; } ret = srpt_ch_qp_rtr(ch, ch->qp); @@ -2073,6 +2112,11 @@ release_channel: destroy_ib: srpt_destroy_ch_ib(ch); +free_recv_ring: + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, + ch->sport->sdev, ch->rq_size, + srp_max_req_size, DMA_FROM_DEVICE); + free_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, ch->sport->sdev, ch->rq_size, @@ -2502,20 +2546,38 @@ static void srpt_add_one(struct ib_device *device) srq_attr.attr.srq_limit = 0; srq_attr.srq_type = IB_SRQT_BASIC; - sdev->srq = ib_create_srq(sdev->pd, &srq_attr); - if (IS_ERR(sdev->srq)) - goto err_pd; + sdev->srq = sdev->port[0].port_attrib.use_srq ? + ib_create_srq(sdev->pd, &srq_attr) : ERR_PTR(-ENOTSUPP); + if (IS_ERR(sdev->srq)) { + pr_debug("ib_create_srq() failed: %ld\n", PTR_ERR(sdev->srq)); + + /* SRQ not supported. */ + sdev->use_srq = false; + } else { + pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", + sdev->srq_size, sdev->device->attrs.max_srq_wr, + device->name); + + sdev->use_srq = true; - pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n", - __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr, - device->name); + sdev->ioctx_ring = (struct srpt_recv_ioctx **) + srpt_alloc_ioctx_ring(sdev, sdev->srq_size, + sizeof(*sdev->ioctx_ring[0]), + srp_max_req_size, + DMA_FROM_DEVICE); + if (!sdev->ioctx_ring) + goto err_pd; + + for (i = 0; i < sdev->srq_size; ++i) + srpt_post_recv(sdev, NULL, sdev->ioctx_ring[i]); + } if (!srpt_service_guid) srpt_service_guid = be64_to_cpu(device->node_guid); sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev); if (IS_ERR(sdev->cm_id)) - goto err_srq; + goto err_ring; /* print out target login information */ pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx," @@ -2535,16 +2597,6 @@ static void srpt_add_one(struct ib_device *device) srpt_event_handler); ib_register_event_handler(&sdev->event_handler); - sdev->ioctx_ring = (struct srpt_recv_ioctx **) - srpt_alloc_ioctx_ring(sdev, sdev->srq_size, - sizeof(*sdev->ioctx_ring[0]), - srp_max_req_size, DMA_FROM_DEVICE); - if (!sdev->ioctx_ring) - goto err_event; - - for (i = 0; i < sdev->srq_size; ++i) - srpt_post_recv(sdev, sdev->ioctx_ring[i]); - WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port)); for (i = 1; i <= sdev->device->phys_port_cnt; i++) { @@ -2554,12 +2606,13 @@ static void srpt_add_one(struct ib_device *device) sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE; sport->port_attrib.srp_max_rsp_size = DEFAULT_MAX_RSP_SIZE; sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE; + sport->port_attrib.use_srq = false; INIT_WORK(&sport->work, srpt_refresh_port_work); if (srpt_refresh_port(sport)) { pr_err("MAD registration failed for %s-%d.\n", sdev->device->name, i); - goto err_ring; + goto err_event; } } @@ -2572,16 +2625,16 @@ out: pr_debug("added %s.\n", device->name); return; -err_ring: - srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, - sdev->srq_size, srp_max_req_size, - DMA_FROM_DEVICE); err_event: ib_unregister_event_handler(&sdev->event_handler); err_cm: ib_destroy_cm_id(sdev->cm_id); -err_srq: - ib_destroy_srq(sdev->srq); +err_ring: + if (sdev->use_srq) + ib_destroy_srq(sdev->srq); + srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, + sdev->srq_size, srp_max_req_size, + DMA_FROM_DEVICE); err_pd: ib_dealloc_pd(sdev->pd); free_dev: @@ -2625,12 +2678,12 @@ static void srpt_remove_one(struct ib_device *device, void *client_data) spin_unlock(&srpt_dev_lock); srpt_release_sdev(sdev); - ib_destroy_srq(sdev->srq); - ib_dealloc_pd(sdev->pd); - + if (sdev->use_srq) + ib_destroy_srq(sdev->srq); srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE); - sdev->ioctx_ring = NULL; + ib_dealloc_pd(sdev->pd); + kfree(sdev); } @@ -2928,14 +2981,43 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item, return count; } +static ssize_t srpt_tpg_attrib_use_srq_show(struct config_item *item, + char *page) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + + return sprintf(page, "%d\n", sport->port_attrib.use_srq); +} + +static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + unsigned long val; + int ret; + + ret = kstrtoul(page, 0, &val); + if (ret < 0) + return ret; + if (val != !!val) + return -EINVAL; + sport->port_attrib.use_srq = val; + + return count; +} + CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rdma_size); CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rsp_size); CONFIGFS_ATTR(srpt_tpg_attrib_, srp_sq_size); +CONFIGFS_ATTR(srpt_tpg_attrib_, use_srq); static struct configfs_attribute *srpt_tpg_attrib_attrs[] = { &srpt_tpg_attrib_attr_srp_max_rdma_size, &srpt_tpg_attrib_attr_srp_max_rsp_size, &srpt_tpg_attrib_attr_srp_sq_size, + &srpt_tpg_attrib_attr_use_srq, NULL, }; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 976e924d7400..673387d365a3 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -252,6 +252,7 @@ enum rdma_ch_state { * @free_list: Head of list with free send I/O contexts. * @state: channel state. See also enum rdma_ch_state. * @ioctx_ring: Send ring. + * @ioctx_recv_ring: Receive I/O context ring. * @list: Node for insertion in the srpt_device.rch_list list. * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This * list contains struct srpt_ioctx elements and is protected @@ -281,6 +282,7 @@ struct srpt_rdma_ch { struct list_head free_list; enum rdma_ch_state state; struct srpt_send_ioctx **ioctx_ring; + struct srpt_recv_ioctx **ioctx_recv_ring; struct list_head list; struct list_head cmd_wait_list; struct se_session *sess; @@ -295,11 +297,13 @@ struct srpt_rdma_ch { * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections. * @srp_max_rsp_size: Maximum size of SRP response messages in bytes. * @srp_sq_size: Shared receive queue (SRQ) size. + * @use_srq: Whether or not to use SRQ. */ struct srpt_port_attrib { u32 srp_max_rdma_size; u32 srp_max_rsp_size; u32 srp_sq_size; + bool use_srq; }; /** @@ -347,6 +351,7 @@ struct srpt_port { * @srq: Per-HCA SRQ (shared receive queue). * @cm_id: Connection identifier. * @srq_size: SRQ size. + * @use_srq: Whether or not to use SRQ. * @ioctx_ring: Per-HCA SRQ. * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list. * @ch_releaseQ: Enables waiting for removal from rch_list. @@ -362,6 +367,7 @@ struct srpt_device { struct ib_srq *srq; struct ib_cm_id *cm_id; int srq_size; + bool use_srq; struct srpt_recv_ioctx **ioctx_ring; struct list_head rch_list; wait_queue_head_t ch_releaseQ; -- 2.11.0