OSDN Git Service

Merge branch 'mr_fix' into git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma...
authorJason Gunthorpe <jgg@mellanox.com>
Mon, 28 May 2018 17:44:35 +0000 (11:44 -0600)
committerJason Gunthorpe <jgg@mellanox.com>
Mon, 28 May 2018 17:44:35 +0000 (11:44 -0600)
Update mlx4 to support user MR creation against read-only memory, previously
it required the memory to be writable.

Based on rdma for-rc due to dependencies.

* mr_fix: (2 commits)
  IB/mlx4: Mark user MR as writable if actual virtual memory is writable
  IB/core: Make testing MR flags for writability a static inline function

31 files changed:
MAINTAINERS
drivers/infiniband/core/umem.c
drivers/infiniband/hw/cxgb4/mem.c
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hns/hns_roce_cq.c
drivers/infiniband/hw/hns/hns_roce_hw_v1.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_main.c
drivers/infiniband/hw/hns/hns_roce_qp.c
drivers/infiniband/hw/i40iw/i40iw.h
drivers/infiniband/hw/i40iw/i40iw_cm.c
drivers/infiniband/hw/i40iw/i40iw_hw.c
drivers/infiniband/hw/i40iw/i40iw_main.c
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/i40iw/i40iw_verbs.h
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/qedr/verbs.c
drivers/infiniband/sw/rxe/rxe_verbs.c
drivers/infiniband/ulp/srpt/Kconfig
drivers/nvme/host/Kconfig
drivers/nvme/target/Kconfig
drivers/staging/lustre/lnet/Kconfig
fs/cifs/Kconfig
include/rdma/ib_umem.h
include/rdma/ib_verbs.h
include/rdma/uverbs_ioctl.h
net/9p/Kconfig
net/rds/Kconfig
net/sunrpc/Kconfig

index 0a1410d..1b52c7e 100644 (file)
@@ -5391,7 +5391,6 @@ S:        Maintained
 F:     drivers/iommu/exynos-iommu.c
 
 EZchip NPS platform support
-M:     Elad Kanfi <eladkan@mellanox.com>
 M:     Vineet Gupta <vgupta@synopsys.com>
 S:     Supported
 F:     arch/arc/plat-eznps
@@ -9012,7 +9011,6 @@ Q:        http://patchwork.ozlabs.org/project/netdev/list/
 F:     drivers/net/ethernet/mellanox/mlx5/core/en_*
 
 MELLANOX ETHERNET INNOVA DRIVER
-M:     Ilan Tayari <ilant@mellanox.com>
 R:     Boris Pismenny <borisp@mellanox.com>
 L:     netdev@vger.kernel.org
 S:     Supported
@@ -9022,7 +9020,6 @@ F:        drivers/net/ethernet/mellanox/mlx5/core/fpga/*
 F:     include/linux/mlx5/mlx5_ifc_fpga.h
 
 MELLANOX ETHERNET INNOVA IPSEC DRIVER
-M:     Ilan Tayari <ilant@mellanox.com>
 R:     Boris Pismenny <borisp@mellanox.com>
 L:     netdev@vger.kernel.org
 S:     Supported
@@ -9078,7 +9075,6 @@ F:        include/uapi/rdma/mlx4-abi.h
 
 MELLANOX MLX5 core VPI driver
 M:     Saeed Mahameed <saeedm@mellanox.com>
-M:     Matan Barak <matanb@mellanox.com>
 M:     Leon Romanovsky <leonro@mellanox.com>
 L:     netdev@vger.kernel.org
 L:     linux-rdma@vger.kernel.org
@@ -9089,7 +9085,6 @@ F:        drivers/net/ethernet/mellanox/mlx5/core/
 F:     include/linux/mlx5/
 
 MELLANOX MLX5 IB driver
-M:     Matan Barak <matanb@mellanox.com>
 M:     Leon Romanovsky <leonro@mellanox.com>
 L:     linux-rdma@vger.kernel.org
 W:     http://www.mellanox.com
@@ -9821,7 +9816,6 @@ F:        net/netfilter/xt_CONNSECMARK.c
 F:     net/netfilter/xt_SECMARK.c
 
 NETWORKING [TLS]
-M:     Ilya Lesokhin <ilyal@mellanox.com>
 M:     Aviad Yehezkel <aviadye@mellanox.com>
 M:     Dave Watson <davejwatson@fb.com>
 L:     netdev@vger.kernel.org
index b3cdc09..54ab633 100644 (file)
@@ -117,20 +117,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
        umem->length     = size;
        umem->address    = addr;
        umem->page_shift = PAGE_SHIFT;
-       umem->pid        = get_task_pid(current, PIDTYPE_PID);
-       /*
-        * We ask for writable memory if any of the following
-        * access flags are set.  "Local write" and "remote write"
-        * obviously require write access.  "Remote atomic" can do
-        * things like fetch and add, which will modify memory, and
-        * "MW bind" can change permissions by binding a window.
-        */
-       umem->writable  = !!(access &
-               (IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
-                IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
+       umem->writable   = ib_access_writable(access);
 
        if (access & IB_ACCESS_ON_DEMAND) {
-               put_pid(umem->pid);
                ret = ib_umem_odp_get(context, umem, access);
                if (ret) {
                        kfree(umem);
@@ -146,7 +135,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
        page_list = (struct page **) __get_free_page(GFP_KERNEL);
        if (!page_list) {
-               put_pid(umem->pid);
                kfree(umem);
                return ERR_PTR(-ENOMEM);
        }
@@ -229,7 +217,6 @@ out:
        if (ret < 0) {
                if (need_release)
                        __ib_umem_release(context->device, umem, 0);
-               put_pid(umem->pid);
                kfree(umem);
        } else
                current->mm->pinned_vm = locked;
@@ -272,8 +259,7 @@ void ib_umem_release(struct ib_umem *umem)
 
        __ib_umem_release(umem->context->device, umem, 1);
 
-       task = get_pid_task(umem->pid, PIDTYPE_PID);
-       put_pid(umem->pid);
+       task = get_pid_task(umem->context->tgid, PIDTYPE_PID);
        if (!task)
                goto out;
        mm = get_task_mm(task);
index e90f2fd..1445918 100644 (file)
@@ -489,10 +489,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
 err_dereg_mem:
        dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
                  mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp);
-err_free_wr_wait:
-       c4iw_put_wr_wait(mhp->wr_waitp);
 err_free_skb:
        kfree_skb(mhp->dereg_skb);
+err_free_wr_wait:
+       c4iw_put_wr_wait(mhp->wr_waitp);
 err_free_mhp:
        kfree(mhp);
        return ERR_PTR(ret);
index 46e9e4f..68580cb 100644 (file)
@@ -5945,6 +5945,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
        u64 status;
        u32 sw_index;
        int i = 0;
+       unsigned long irq_flags;
 
        sw_index = dd->hw_to_sw[hw_context];
        if (sw_index >= dd->num_send_contexts) {
@@ -5954,10 +5955,12 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
                return;
        }
        sci = &dd->send_contexts[sw_index];
+       spin_lock_irqsave(&dd->sc_lock, irq_flags);
        sc = sci->sc;
        if (!sc) {
                dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
                           sw_index, hw_context);
+               spin_unlock_irqrestore(&dd->sc_lock, irq_flags);
                return;
        }
 
@@ -5979,6 +5982,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
         */
        if (sc->type != SC_USER)
                queue_work(dd->pport->hfi1_wq, &sc->halt_work);
+       spin_unlock_irqrestore(&dd->sc_lock, irq_flags);
 
        /*
         * Update the counters for the corresponding status bits.
index 14734d0..3a485f5 100644 (file)
@@ -377,6 +377,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
 
                        hr_cq->set_ci_db = hr_cq->db.db_record;
                        *hr_cq->set_ci_db = 0;
+                       hr_cq->db_en = 1;
                }
 
                /* Init mmt table and write buff address to mtt table */
index 47e1b6a..8013d69 100644 (file)
@@ -722,6 +722,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
        free_mr->mr_free_pd = to_hr_pd(pd);
        free_mr->mr_free_pd->ibpd.device  = &hr_dev->ib_dev;
        free_mr->mr_free_pd->ibpd.uobject = NULL;
+       free_mr->mr_free_pd->ibpd.__internal_mr = NULL;
        atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0);
 
        attr.qp_access_flags    = IB_ACCESS_REMOTE_WRITE;
@@ -1036,7 +1037,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
 
        do {
                ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
-               if (ret < 0) {
+               if (ret < 0 && hr_qp) {
                        dev_err(dev,
                           "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n",
                           hr_qp->qpn, ret, hr_mr->key, ne);
index a25c3da..dd716ed 100644 (file)
@@ -142,8 +142,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        unsigned long flags;
        unsigned int ind;
        void *wqe = NULL;
-       u32 tmp_len = 0;
        bool loopback;
+       u32 tmp_len;
        int ret = 0;
        u8 *smac;
        int nreq;
@@ -189,6 +189,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
                owner_bit =
                       ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
+               tmp_len = 0;
 
                /* Corresponding to the QP type, wqe process separately */
                if (ibqp->qp_type == IB_QPT_GSI) {
@@ -547,16 +548,20 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                }
 
                if (i < hr_qp->rq.max_gs) {
-                       dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
-                       dseg[i].addr = 0;
+                       dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
+                       dseg->addr = 0;
                }
 
                /* rq support inline data */
-               sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
-               hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = (u32)wr->num_sge;
-               for (i = 0; i < wr->num_sge; i++) {
-                       sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr;
-                       sge_list[i].len = wr->sg_list[i].length;
+               if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+                       sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
+                       hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt =
+                                                              (u32)wr->num_sge;
+                       for (i = 0; i < wr->num_sge; i++) {
+                               sge_list[i].addr =
+                                              (void *)(u64)wr->sg_list[i].addr;
+                               sge_list[i].len = wr->sg_list[i].length;
+                       }
                }
 
                hr_qp->rq.wrid[ind] = wr->wr_id;
@@ -613,6 +618,8 @@ static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev,
        dma_unmap_single(hr_dev->dev, ring->desc_dma_addr,
                         ring->desc_num * sizeof(struct hns_roce_cmq_desc),
                         DMA_BIDIRECTIONAL);
+
+       ring->desc_dma_addr = 0;
        kfree(ring->desc);
 }
 
@@ -1081,6 +1088,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
        if (ret) {
                dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n",
                        ret);
+               return ret;
        }
 
        /* Get pf resource owned by every pf */
@@ -1373,6 +1381,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
 
        roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S,
                     mr->type == MR_TYPE_MR ? 0 : 1);
+       roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S,
+                    1);
        mpt_entry->byte_12_mw_pa = cpu_to_le32(mpt_entry->byte_12_mw_pa);
 
        mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
@@ -2171,6 +2181,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
                                    struct hns_roce_v2_qp_context *context,
                                    struct hns_roce_v2_qp_context *qpc_mask)
 {
+       struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
 
        /*
@@ -2283,7 +2294,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
        context->rq_db_record_addr = hr_qp->rdb.dma >> 32;
        qpc_mask->rq_db_record_addr = 0;
 
-       roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1);
+       roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S,
+                   (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) ? 1 : 0);
        roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0);
 
        roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
@@ -4707,6 +4719,8 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
        {0, }
 };
 
+MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl);
+
 static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
                                  struct hnae3_handle *handle)
 {
index 1b79a38..c614f91 100644 (file)
@@ -198,7 +198,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
 
        memset(props, 0, sizeof(*props));
 
-       props->sys_image_guid = cpu_to_be32(hr_dev->sys_image_guid);
+       props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid);
        props->max_mr_size = (u64)(~(0ULL));
        props->page_size_cap = hr_dev->caps.page_size_cap;
        props->vendor_id = hr_dev->vendor_id;
index d4aad34..baaf906 100644 (file)
@@ -660,6 +660,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
                                goto err_rq_sge_list;
                        }
                        *hr_qp->rdb.db_record = 0;
+                       hr_qp->rdb_en = 1;
                }
 
                /* Allocate QP buf */
@@ -955,7 +956,14 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        }
 
        if (cur_state == new_state && cur_state == IB_QPS_RESET) {
-               ret = 0;
+               if (hr_dev->caps.min_wqes) {
+                       ret = -EPERM;
+                       dev_err(dev, "cur_state=%d new_state=%d\n", cur_state,
+                               new_state);
+               } else {
+                       ret = 0;
+               }
+
                goto out;
        }
 
index d5d8c1b..2f2b442 100644 (file)
@@ -207,6 +207,7 @@ struct i40iw_msix_vector {
        u32 irq;
        u32 cpu_affinity;
        u32 ceq_id;
+       cpumask_t mask;
 };
 
 struct l2params_work {
index a24daac..7b26551 100644 (file)
@@ -2085,7 +2085,7 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
        if (netif_is_bond_slave(netdev))
                netdev = netdev_master_upper_dev_get(netdev);
 
-       neigh = dst_neigh_lookup(dst, &dst_addr);
+       neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32);
 
        rcu_read_lock();
        if (neigh) {
index 414a36c..2836c54 100644 (file)
@@ -331,7 +331,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
                switch (info->ae_id) {
                case I40IW_AE_LLP_FIN_RECEIVED:
                        if (qp->term_flags)
-                               continue;
+                               break;
                        if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
                                iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT;
                                if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) &&
@@ -360,7 +360,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
                        break;
                case I40IW_AE_LLP_CONNECTION_RESET:
                        if (atomic_read(&iwqp->close_timer_started))
-                               continue;
+                               break;
                        i40iw_cm_disconn(iwqp);
                        break;
                case I40IW_AE_QP_SUSPEND_COMPLETE:
index a220794..68095f0 100644 (file)
@@ -687,7 +687,6 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw
                                                         struct i40iw_msix_vector *msix_vec)
 {
        enum i40iw_status_code status;
-       cpumask_t mask;
 
        if (iwdev->msix_shared && !ceq_id) {
                tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
@@ -697,9 +696,9 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw
                status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
        }
 
-       cpumask_clear(&mask);
-       cpumask_set_cpu(msix_vec->cpu_affinity, &mask);
-       irq_set_affinity_hint(msix_vec->irq, &mask);
+       cpumask_clear(&msix_vec->mask);
+       cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask);
+       irq_set_affinity_hint(msix_vec->irq, &msix_vec->mask);
 
        if (status) {
                i40iw_pr_err("ceq irq config fail\n");
index 40e4f5a..68679ad 100644 (file)
@@ -394,6 +394,7 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
 
        list_for_each_entry(iwpbl, pbl_list, list) {
                if (iwpbl->user_base == va) {
+                       iwpbl->on_list = false;
                        list_del(&iwpbl->list);
                        return iwpbl;
                }
@@ -614,6 +615,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
                return ERR_PTR(-ENOMEM);
 
        iwqp = (struct i40iw_qp *)mem;
+       iwqp->allocated_buffer = mem;
        qp = &iwqp->sc_qp;
        qp->back_qp = (void *)iwqp;
        qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
@@ -642,7 +644,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
                goto error;
        }
 
-       iwqp->allocated_buffer = mem;
        iwqp->iwdev = iwdev;
        iwqp->iwpd = iwpd;
        iwqp->ibqp.qp_num = qp_num;
@@ -1898,6 +1899,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
                        goto error;
                spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
                list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
+               iwpbl->on_list = true;
                spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
                break;
        case IW_MEMREG_TYPE_CQ:
@@ -1908,6 +1910,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
 
                spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
                list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
+               iwpbl->on_list = true;
                spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
                break;
        case IW_MEMREG_TYPE_MEM:
@@ -2045,14 +2048,18 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr,
        switch (iwmr->type) {
        case IW_MEMREG_TYPE_CQ:
                spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
-               if (!list_empty(&ucontext->cq_reg_mem_list))
+               if (iwpbl->on_list) {
+                       iwpbl->on_list = false;
                        list_del(&iwpbl->list);
+               }
                spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
                break;
        case IW_MEMREG_TYPE_QP:
                spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
-               if (!list_empty(&ucontext->qp_reg_mem_list))
+               if (iwpbl->on_list) {
+                       iwpbl->on_list = false;
                        list_del(&iwpbl->list);
+               }
                spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
                break;
        default:
index 9067443..76cf173 100644 (file)
@@ -78,6 +78,7 @@ struct i40iw_pbl {
        };
 
        bool pbl_allocated;
+       bool on_list;
        u64 user_base;
        struct i40iw_pble_alloc pble_alloc;
        struct i40iw_mr *iwmr;
index 61d8b06..ed1f253 100644 (file)
@@ -367,6 +367,40 @@ end:
        return block_shift;
 }
 
+static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start,
+                                       u64 length, u64 virt_addr,
+                                       int access_flags)
+{
+       /*
+        * Force registering the memory as writable if the underlying pages
+        * are writable.  This is so rereg can change the access permissions
+        * from readable to writable without having to run through ib_umem_get
+        * again
+        */
+       if (!ib_access_writable(access_flags)) {
+               struct vm_area_struct *vma;
+
+               down_read(&current->mm->mmap_sem);
+               /*
+                * FIXME: Ideally this would iterate over all the vmas that
+                * cover the memory, but for now it requires a single vma to
+                * entirely cover the MR to support RO mappings.
+                */
+               vma = find_vma(current->mm, start);
+               if (vma && vma->vm_end >= start + length &&
+                   vma->vm_start <= start) {
+                       if (vma->vm_flags & VM_WRITE)
+                               access_flags |= IB_ACCESS_LOCAL_WRITE;
+               } else {
+                       access_flags |= IB_ACCESS_LOCAL_WRITE;
+               }
+
+               up_read(&current->mm->mmap_sem);
+       }
+
+       return ib_umem_get(context, start, length, access_flags, 0);
+}
+
 struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                  u64 virt_addr, int access_flags,
                                  struct ib_udata *udata)
@@ -381,10 +415,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
-       /* Force registering the memory as writable. */
-       /* Used for memory re-registeration. HCA protects the access */
-       mr->umem = ib_umem_get(pd->uobject->context, start, length,
-                              access_flags | IB_ACCESS_LOCAL_WRITE, 0);
+       mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length,
+                                   virt_addr, access_flags);
        if (IS_ERR(mr->umem)) {
                err = PTR_ERR(mr->umem);
                goto err_free;
@@ -454,6 +486,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
        }
 
        if (flags & IB_MR_REREG_ACCESS) {
+               if (ib_access_writable(mr_access_flags) && !mmr->umem->writable)
+                       return -EPERM;
+
                err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
                                               convert_access(mr_access_flags));
 
@@ -467,10 +502,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
 
                mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
                ib_umem_release(mmr->umem);
-               mmr->umem = ib_umem_get(mr->uobject->context, start, length,
-                                       mr_access_flags |
-                                       IB_ACCESS_LOCAL_WRITE,
-                                       0);
+               mmr->umem =
+                       mlx4_get_umem_mr(mr->uobject->context, start, length,
+                                        virt_addr, mr_access_flags);
                if (IS_ERR(mmr->umem)) {
                        err = PTR_ERR(mmr->umem);
                        /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */
index f3e7d7c..25a271e 100644 (file)
@@ -2419,7 +2419,7 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
        MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
 }
 
-static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val,
+static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
                           bool inner)
 {
        if (inner) {
index 0600bc8..a4f1f63 100644 (file)
@@ -487,11 +487,6 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
        return 1;
 }
 
-static int first_med_bfreg(void)
-{
-       return 1;
-}
-
 enum {
        /* this is the first blue flame register in the array of bfregs assigned
         * to a processes. Since we do not use it for blue flame but rather
@@ -517,6 +512,12 @@ static int num_med_bfreg(struct mlx5_ib_dev *dev,
        return n >= 0 ? n : 0;
 }
 
+static int first_med_bfreg(struct mlx5_ib_dev *dev,
+                          struct mlx5_bfreg_info *bfregi)
+{
+       return num_med_bfreg(dev, bfregi) ? 1 : -ENOMEM;
+}
+
 static int first_hi_bfreg(struct mlx5_ib_dev *dev,
                          struct mlx5_bfreg_info *bfregi)
 {
@@ -544,10 +545,13 @@ static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev,
 static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev,
                                 struct mlx5_bfreg_info *bfregi)
 {
-       int minidx = first_med_bfreg();
+       int minidx = first_med_bfreg(dev, bfregi);
        int i;
 
-       for (i = first_med_bfreg(); i < first_hi_bfreg(dev, bfregi); i++) {
+       if (minidx < 0)
+               return minidx;
+
+       for (i = minidx; i < first_hi_bfreg(dev, bfregi); i++) {
                if (bfregi->count[i] < bfregi->count[minidx])
                        minidx = i;
                if (!bfregi->count[minidx])
index 35f3b6f..988aace 100644 (file)
@@ -401,49 +401,47 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 {
        struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
        struct qedr_dev *dev = get_qedr_dev(context->device);
-       unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
-       u64 unmapped_db = dev->db_phys_addr;
+       unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
        unsigned long len = (vma->vm_end - vma->vm_start);
-       int rc = 0;
-       bool found;
+       unsigned long dpi_start;
+
+       dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
 
        DP_DEBUG(dev, QEDR_MSG_INIT,
-                "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
-                vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
-       if (vma->vm_start & (PAGE_SIZE - 1)) {
-               DP_ERR(dev, "Vma_start not page aligned = %ld\n",
-                      vma->vm_start);
+                "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
+                (void *)vma->vm_start, (void *)vma->vm_end,
+                (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
+
+       if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
+               DP_ERR(dev,
+                      "failed mmap, adrresses must be page aligned: start=0x%pK, end=0x%pK\n",
+                      (void *)vma->vm_start, (void *)vma->vm_end);
                return -EINVAL;
        }
 
-       found = qedr_search_mmap(ucontext, vm_page, len);
-       if (!found) {
-               DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
+       if (!qedr_search_mmap(ucontext, phys_addr, len)) {
+               DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
                       vma->vm_pgoff);
                return -EINVAL;
        }
 
-       DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
-
-       if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
-                                                    dev->db_size))) {
-               DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
-               if (vma->vm_flags & VM_READ) {
-                       DP_ERR(dev, "Trying to map doorbell bar for read\n");
-                       return -EPERM;
-               }
-
-               vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+       if (phys_addr < dpi_start ||
+           ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
+               DP_ERR(dev,
+                      "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
+                      (void *)phys_addr, (void *)dpi_start,
+                      ucontext->dpi_size);
+               return -EINVAL;
+       }
 
-               rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-                                       PAGE_SIZE, vma->vm_page_prot);
-       } else {
-               DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
-               rc = remap_pfn_range(vma, vma->vm_start,
-                                    vma->vm_pgoff, len, vma->vm_page_prot);
+       if (vma->vm_flags & VM_READ) {
+               DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
+               return -EINVAL;
        }
-       DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
-       return rc;
+
+       vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+       return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
+                                 vma->vm_page_prot);
 }
 
 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
index c520614..9deafc3 100644 (file)
@@ -761,7 +761,6 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
        unsigned int mask;
        unsigned int length = 0;
        int i;
-       int must_sched;
 
        while (wr) {
                mask = wr_opcode_mask(wr->opcode, qp);
@@ -791,14 +790,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
                wr = wr->next;
        }
 
-       /*
-        * Must sched in case of GSI QP because ib_send_mad() hold irq lock,
-        * and the requester call ip_local_out_sk() that takes spin_lock_bh.
-        */
-       must_sched = (qp_type(qp) == IB_QPT_GSI) ||
-                       (queue_count(qp->sq.queue) > 1);
-
-       rxe_run_task(&qp->req.task, must_sched);
+       rxe_run_task(&qp->req.task, 1);
        if (unlikely(qp->req.state == QP_STATE_ERROR))
                rxe_run_task(&qp->comp.task, 1);
 
index fb8b718..25bf695 100644 (file)
@@ -1,6 +1,6 @@
 config INFINIBAND_SRPT
        tristate "InfiniBand SCSI RDMA Protocol target support"
-       depends on INFINIBAND && INFINIBAND_ADDR_TRANS && TARGET_CORE
+       depends on INFINIBAND_ADDR_TRANS && TARGET_CORE
        ---help---
 
          Support for the SCSI RDMA Protocol (SRP) Target driver. The
index 88a8b59..dbb7464 100644 (file)
@@ -27,7 +27,7 @@ config NVME_FABRICS
 
 config NVME_RDMA
        tristate "NVM Express over Fabrics RDMA host driver"
-       depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK
+       depends on INFINIBAND_ADDR_TRANS && BLOCK
        select NVME_CORE
        select NVME_FABRICS
        select SG_POOL
index 3c7b61d..7595664 100644 (file)
@@ -27,7 +27,7 @@ config NVME_TARGET_LOOP
 
 config NVME_TARGET_RDMA
        tristate "NVMe over Fabrics RDMA target support"
-       depends on INFINIBAND && INFINIBAND_ADDR_TRANS
+       depends on INFINIBAND_ADDR_TRANS
        depends on NVME_TARGET
        select SGL_ALLOC
        help
index ad049e6..f3b1ad4 100644 (file)
@@ -34,7 +34,7 @@ config LNET_SELFTEST
 
 config LNET_XPRT_IB
        tristate "LNET infiniband support"
-       depends on LNET && PCI && INFINIBAND && INFINIBAND_ADDR_TRANS
+       depends on LNET && PCI && INFINIBAND_ADDR_TRANS
        default LNET && INFINIBAND
        help
          This option allows the LNET users to use infiniband as an
index 5f132d5..d61e2de 100644 (file)
@@ -197,7 +197,7 @@ config CIFS_SMB311
 
 config CIFS_SMB_DIRECT
        bool "SMB Direct support (Experimental)"
-       depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y
+       depends on CIFS=m && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND_ADDR_TRANS=y
        help
          Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1.
          SMB Direct allows transferring SMB packets over RDMA. If unsure,
index 23159dd..a1fd638 100644 (file)
@@ -48,7 +48,6 @@ struct ib_umem {
        int                     writable;
        int                     hugetlb;
        struct work_struct      work;
-       struct pid             *pid;
        struct mm_struct       *mm;
        unsigned long           diff;
        struct ib_umem_odp     *odp_data;
index e849bd0..406c98d 100644 (file)
@@ -3766,6 +3766,20 @@ static inline int ib_check_mr_access(int flags)
        return 0;
 }
 
+static inline bool ib_access_writable(int access_flags)
+{
+       /*
+        * We have writable memory backing the MR if any of the following
+        * access flags are set.  "Local write" and "remote write" obviously
+        * require write access.  "Remote atomic" can do things like fetch and
+        * add, which will modify memory, and "MW bind" can change permissions
+        * by binding a window.
+        */
+       return access_flags &
+               (IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
+                IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND);
+}
+
 /**
  * ib_check_mr_status: lightweight check of MR status.
  *     This routine may provide status checks on a selected
index 4a4201d..095383a 100644 (file)
@@ -411,13 +411,13 @@ static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs
 static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle,
                                        u16 idx)
 {
-       struct ib_uobject *uobj =
-               uverbs_attr_get(attrs_bundle, idx)->obj_attr.uobject;
+       const struct uverbs_attr *attr;
 
-       if (IS_ERR(uobj))
-               return uobj;
+       attr = uverbs_attr_get(attrs_bundle, idx);
+       if (IS_ERR(attr))
+               return ERR_CAST(attr);
 
-       return uobj->object;
+       return attr->obj_attr.uobject->object;
 }
 
 static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
index e6014e0..46c39f7 100644 (file)
@@ -32,7 +32,7 @@ config NET_9P_XEN
 
 
 config NET_9P_RDMA
-       depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS
+       depends on INET && INFINIBAND_ADDR_TRANS
        tristate "9P RDMA Transport (Experimental)"
        help
          This builds support for an RDMA transport.
index bffde4b..1a31502 100644 (file)
@@ -8,7 +8,7 @@ config RDS
 
 config RDS_RDMA
        tristate "RDS over Infiniband"
-       depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS
+       depends on RDS && INFINIBAND_ADDR_TRANS
        ---help---
          Allow RDS to use Infiniband as a transport.
          This transport supports RDMA operations.
index ac09ca8..6358e52 100644 (file)
@@ -50,7 +50,7 @@ config SUNRPC_DEBUG
 
 config SUNRPC_XPRT_RDMA
        tristate "RPC-over-RDMA transport"
-       depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
+       depends on SUNRPC && INFINIBAND_ADDR_TRANS
        default SUNRPC && INFINIBAND
        select SG_POOL
        help