OSDN Git Service

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 17 Nov 2016 21:53:02 +0000 (13:53 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 17 Nov 2016 21:53:02 +0000 (13:53 -0800)
Pull rmda fixes from Doug Ledford.
 "First round of -rc fixes.

  Due to various issues, I've been away and couldn't send a pull request
  for about three weeks. There were a number of -rc patches that built
  up in the meantime (some where there already from the early -rc
  stages). Obviously, there were way too many to send now, so I tried to
  pare the list down to the more important patches for the -rc cycle.

  Most of the code has had plenty of soak time at the various vendor's
  testing setups, so I doubt there will be another -rc pull request this
  cycle. I also tried to limit the patches to those with smaller
  footprints, so even though a shortlog is longer than I would like, the
  actual diffstat is mostly very small with the exception of just three
  files that had more changes, and a couple files with pure removals.

  Summary:
   - Misc Intel hfi1 fixes
   - Misc Mellanox mlx4, mlx5, and rxe fixes
   - A couple cxgb4 fixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (34 commits)
  iw_cxgb4: invalidate the mr when posting a read_w_inv wr
  iw_cxgb4: set *bad_wr for post_send/post_recv errors
  IB/rxe: Update qp state for user query
  IB/rxe: Clear queue buffer when modifying QP to reset
  IB/rxe: Fix handling of erroneous WR
  IB/rxe: Fix kernel panic in UDP tunnel with GRO and RX checksum
  IB/mlx4: Fix create CQ error flow
  IB/mlx4: Check gid_index return value
  IB/mlx5: Fix NULL pointer dereference on debug print
  IB/mlx5: Fix fatal error dispatching
  IB/mlx5: Resolve soft lock on massive reg MRs
  IB/mlx5: Use cache line size to select CQE stride
  IB/mlx5: Validate requested RQT size
  IB/mlx5: Fix memory leak in query device
  IB/core: Avoid unsigned int overflow in sg_alloc_table
  IB/core: Add missing check for addr_resolve callback return value
  IB/core: Set routable RoCE gid type for ipv4/ipv6 networks
  IB/cm: Mark stale CM id's whenever the mad agent was unregistered
  IB/uverbs: Fix leak of XRC target QPs
  IB/hfi1: Remove incorrect IS_ERR check
  ...

37 files changed:
drivers/infiniband/core/addr.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/umem.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/mem.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/hfi1/affinity.c
drivers/infiniband/hw/hfi1/affinity.h
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/chip.h
drivers/infiniband/hw/hfi1/driver.c
drivers/infiniband/hw/hfi1/file_ops.c
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/pcie.c
drivers/infiniband/hw/hfi1/pio.c
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/hfi1/sdma.c
drivers/infiniband/hw/hfi1/sysfs.c
drivers/infiniband/hw/hfi1/trace_rx.h
drivers/infiniband/hw/hfi1/user_sdma.c
drivers/infiniband/hw/mlx4/ah.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/sw/rdmavt/dma.c
drivers/infiniband/sw/rxe/rxe_net.c
drivers/infiniband/sw/rxe/rxe_qp.c
drivers/infiniband/sw/rxe/rxe_queue.c
drivers/infiniband/sw/rxe/rxe_queue.h
drivers/infiniband/sw/rxe/rxe_req.c

index b136d3a..0f58f46 100644 (file)
@@ -699,13 +699,16 @@ EXPORT_SYMBOL(rdma_addr_cancel);
 struct resolve_cb_context {
        struct rdma_dev_addr *addr;
        struct completion comp;
+       int status;
 };
 
 static void resolve_cb(int status, struct sockaddr *src_addr,
             struct rdma_dev_addr *addr, void *context)
 {
-       memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
-                               rdma_dev_addr));
+       if (!status)
+               memcpy(((struct resolve_cb_context *)context)->addr,
+                      addr, sizeof(struct rdma_dev_addr));
+       ((struct resolve_cb_context *)context)->status = status;
        complete(&((struct resolve_cb_context *)context)->comp);
 }
 
@@ -743,6 +746,10 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 
        wait_for_completion(&ctx.comp);
 
+       ret = ctx.status;
+       if (ret)
+               return ret;
+
        memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
        dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
        if (!dev)
index c995255..71c7c4c 100644 (file)
@@ -80,6 +80,8 @@ static struct ib_cm {
        __be32 random_id_operand;
        struct list_head timewait_list;
        struct workqueue_struct *wq;
+       /* Sync on cm change port state */
+       spinlock_t state_lock;
 } cm;
 
 /* Counter indexes ordered by attribute ID */
@@ -161,6 +163,8 @@ struct cm_port {
        struct ib_mad_agent *mad_agent;
        struct kobject port_obj;
        u8 port_num;
+       struct list_head cm_priv_prim_list;
+       struct list_head cm_priv_altr_list;
        struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
 };
 
@@ -241,6 +245,12 @@ struct cm_id_private {
        u8 service_timeout;
        u8 target_ack_delay;
 
+       struct list_head prim_list;
+       struct list_head altr_list;
+       /* Indicates that the send port mad is registered and av is set */
+       int prim_send_port_not_ready;
+       int altr_send_port_not_ready;
+
        struct list_head work_list;
        atomic_t work_count;
 };
@@ -259,20 +269,47 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
        struct ib_mad_agent *mad_agent;
        struct ib_mad_send_buf *m;
        struct ib_ah *ah;
+       struct cm_av *av;
+       unsigned long flags, flags2;
+       int ret = 0;
 
+       /* don't let the port to be released till the agent is down */
+       spin_lock_irqsave(&cm.state_lock, flags2);
+       spin_lock_irqsave(&cm.lock, flags);
+       if (!cm_id_priv->prim_send_port_not_ready)
+               av = &cm_id_priv->av;
+       else if (!cm_id_priv->altr_send_port_not_ready &&
+                (cm_id_priv->alt_av.port))
+               av = &cm_id_priv->alt_av;
+       else {
+               pr_info("%s: not valid CM id\n", __func__);
+               ret = -ENODEV;
+               spin_unlock_irqrestore(&cm.lock, flags);
+               goto out;
+       }
+       spin_unlock_irqrestore(&cm.lock, flags);
+       /* Make sure the port haven't released the mad yet */
        mad_agent = cm_id_priv->av.port->mad_agent;
-       ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
-       if (IS_ERR(ah))
-               return PTR_ERR(ah);
+       if (!mad_agent) {
+               pr_info("%s: not a valid MAD agent\n", __func__);
+               ret = -ENODEV;
+               goto out;
+       }
+       ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
+       if (IS_ERR(ah)) {
+               ret = PTR_ERR(ah);
+               goto out;
+       }
 
        m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
-                              cm_id_priv->av.pkey_index,
+                              av->pkey_index,
                               0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
                               GFP_ATOMIC,
                               IB_MGMT_BASE_VERSION);
        if (IS_ERR(m)) {
                ib_destroy_ah(ah);
-               return PTR_ERR(m);
+               ret = PTR_ERR(m);
+               goto out;
        }
 
        /* Timeout set by caller if response is expected. */
@@ -282,7 +319,10 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
        atomic_inc(&cm_id_priv->refcount);
        m->context[0] = cm_id_priv;
        *msg = m;
-       return 0;
+
+out:
+       spin_unlock_irqrestore(&cm.state_lock, flags2);
+       return ret;
 }
 
 static int cm_alloc_response_msg(struct cm_port *port,
@@ -352,7 +392,8 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
                           grh, &av->ah_attr);
 }
 
-static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
+static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
+                             struct cm_id_private *cm_id_priv)
 {
        struct cm_device *cm_dev;
        struct cm_port *port = NULL;
@@ -387,7 +428,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
                             &av->ah_attr);
        av->timeout = path->packet_life_time + 1;
 
-       return 0;
+       spin_lock_irqsave(&cm.lock, flags);
+       if (&cm_id_priv->av == av)
+               list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
+       else if (&cm_id_priv->alt_av == av)
+               list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
+       else
+               ret = -EINVAL;
+
+       spin_unlock_irqrestore(&cm.lock, flags);
+
+       return ret;
 }
 
 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
@@ -677,6 +728,8 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
        spin_lock_init(&cm_id_priv->lock);
        init_completion(&cm_id_priv->comp);
        INIT_LIST_HEAD(&cm_id_priv->work_list);
+       INIT_LIST_HEAD(&cm_id_priv->prim_list);
+       INIT_LIST_HEAD(&cm_id_priv->altr_list);
        atomic_set(&cm_id_priv->work_count, -1);
        atomic_set(&cm_id_priv->refcount, 1);
        return &cm_id_priv->id;
@@ -892,6 +945,15 @@ retest:
                break;
        }
 
+       spin_lock_irq(&cm.lock);
+       if (!list_empty(&cm_id_priv->altr_list) &&
+           (!cm_id_priv->altr_send_port_not_ready))
+               list_del(&cm_id_priv->altr_list);
+       if (!list_empty(&cm_id_priv->prim_list) &&
+           (!cm_id_priv->prim_send_port_not_ready))
+               list_del(&cm_id_priv->prim_list);
+       spin_unlock_irq(&cm.lock);
+
        cm_free_id(cm_id->local_id);
        cm_deref_id(cm_id_priv);
        wait_for_completion(&cm_id_priv->comp);
@@ -1192,12 +1254,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
                goto out;
        }
 
-       ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
+       ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
+                                cm_id_priv);
        if (ret)
                goto error1;
        if (param->alternate_path) {
                ret = cm_init_av_by_path(param->alternate_path,
-                                        &cm_id_priv->alt_av);
+                                        &cm_id_priv->alt_av, cm_id_priv);
                if (ret)
                        goto error1;
        }
@@ -1653,7 +1716,8 @@ static int cm_req_handler(struct cm_work *work)
                        dev_put(gid_attr.ndev);
                }
                work->path[0].gid_type = gid_attr.gid_type;
-               ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+               ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
+                                        cm_id_priv);
        }
        if (ret) {
                int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
@@ -1672,7 +1736,8 @@ static int cm_req_handler(struct cm_work *work)
                goto rejected;
        }
        if (req_msg->alt_local_lid) {
-               ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
+               ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
+                                        cm_id_priv);
                if (ret) {
                        ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
                                       &work->path[0].sgid,
@@ -2727,7 +2792,8 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
                goto out;
        }
 
-       ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
+       ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
+                                cm_id_priv);
        if (ret)
                goto out;
        cm_id_priv->alt_av.timeout =
@@ -2839,7 +2905,8 @@ static int cm_lap_handler(struct cm_work *work)
        cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
                                work->mad_recv_wc->recv_buf.grh,
                                &cm_id_priv->av);
-       cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
+       cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
+                          cm_id_priv);
        ret = atomic_inc_and_test(&cm_id_priv->work_count);
        if (!ret)
                list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -3031,7 +3098,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
                return -EINVAL;
 
        cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-       ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
+       ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
        if (ret)
                goto out;
 
@@ -3468,7 +3535,9 @@ out:
 static int cm_migrate(struct ib_cm_id *cm_id)
 {
        struct cm_id_private *cm_id_priv;
+       struct cm_av tmp_av;
        unsigned long flags;
+       int tmp_send_port_not_ready;
        int ret = 0;
 
        cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -3477,7 +3546,14 @@ static int cm_migrate(struct ib_cm_id *cm_id)
            (cm_id->lap_state == IB_CM_LAP_UNINIT ||
             cm_id->lap_state == IB_CM_LAP_IDLE)) {
                cm_id->lap_state = IB_CM_LAP_IDLE;
+               /* Swap address vector */
+               tmp_av = cm_id_priv->av;
                cm_id_priv->av = cm_id_priv->alt_av;
+               cm_id_priv->alt_av = tmp_av;
+               /* Swap port send ready state */
+               tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
+               cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
+               cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
        } else
                ret = -EINVAL;
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -3888,6 +3964,9 @@ static void cm_add_one(struct ib_device *ib_device)
                port->cm_dev = cm_dev;
                port->port_num = i;
 
+               INIT_LIST_HEAD(&port->cm_priv_prim_list);
+               INIT_LIST_HEAD(&port->cm_priv_altr_list);
+
                ret = cm_create_port_fs(port);
                if (ret)
                        goto error1;
@@ -3945,6 +4024,8 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
 {
        struct cm_device *cm_dev = client_data;
        struct cm_port *port;
+       struct cm_id_private *cm_id_priv;
+       struct ib_mad_agent *cur_mad_agent;
        struct ib_port_modify port_modify = {
                .clr_port_cap_mask = IB_PORT_CM_SUP
        };
@@ -3968,15 +4049,27 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
 
                port = cm_dev->port[i-1];
                ib_modify_port(ib_device, port->port_num, 0, &port_modify);
+               /* Mark all the cm_id's as not valid */
+               spin_lock_irq(&cm.lock);
+               list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
+                       cm_id_priv->altr_send_port_not_ready = 1;
+               list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
+                       cm_id_priv->prim_send_port_not_ready = 1;
+               spin_unlock_irq(&cm.lock);
                /*
                 * We flush the queue here after the going_down set, this
                 * verify that no new works will be queued in the recv handler,
                 * after that we can call the unregister_mad_agent
                 */
                flush_workqueue(cm.wq);
-               ib_unregister_mad_agent(port->mad_agent);
+               spin_lock_irq(&cm.state_lock);
+               cur_mad_agent = port->mad_agent;
+               port->mad_agent = NULL;
+               spin_unlock_irq(&cm.state_lock);
+               ib_unregister_mad_agent(cur_mad_agent);
                cm_remove_port_fs(port);
        }
+
        device_unregister(cm_dev->device);
        kfree(cm_dev);
 }
@@ -3989,6 +4082,7 @@ static int __init ib_cm_init(void)
        INIT_LIST_HEAD(&cm.device_list);
        rwlock_init(&cm.device_lock);
        spin_lock_init(&cm.lock);
+       spin_lock_init(&cm.state_lock);
        cm.listen_service_table = RB_ROOT;
        cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
        cm.remote_id_table = RB_ROOT;
index 89a6b05..2a6fc47 100644 (file)
@@ -2438,6 +2438,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
        return 0;
 }
 
+static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
+                                          unsigned long supported_gids,
+                                          enum ib_gid_type default_gid)
+{
+       if ((network_type == RDMA_NETWORK_IPV4 ||
+            network_type == RDMA_NETWORK_IPV6) &&
+           test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
+               return IB_GID_TYPE_ROCE_UDP_ENCAP;
+
+       return default_gid;
+}
+
 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 {
        struct rdma_route *route = &id_priv->id.route;
@@ -2463,6 +2475,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
        route->num_paths = 1;
 
        if (addr->dev_addr.bound_dev_if) {
+               unsigned long supported_gids;
+
                ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
                if (!ndev) {
                        ret = -ENODEV;
@@ -2486,7 +2500,12 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 
                route->path_rec->net = &init_net;
                route->path_rec->ifindex = ndev->ifindex;
-               route->path_rec->gid_type = id_priv->gid_type;
+               supported_gids = roce_gid_type_mask_support(id_priv->id.device,
+                                                           id_priv->id.port_num);
+               route->path_rec->gid_type =
+                       cma_route_gid_type(addr->dev_addr.network,
+                                          supported_gids,
+                                          id_priv->gid_type);
        }
        if (!ndev) {
                ret = -ENODEV;
index 224ad27..84b4eff 100644 (file)
@@ -175,7 +175,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
        cur_base = addr & PAGE_MASK;
 
-       if (npages == 0) {
+       if (npages == 0 || npages > UINT_MAX) {
                ret = -EINVAL;
                goto out;
        }
index 0012fa5..44b1104 100644 (file)
@@ -262,12 +262,9 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                        container_of(uobj, struct ib_uqp_object, uevent.uobject);
 
                idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
-               if (qp != qp->real_qp) {
-                       ib_close_qp(qp);
-               } else {
+               if (qp == qp->real_qp)
                        ib_uverbs_detach_umcast(qp, uqp);
-                       ib_destroy_qp(qp);
-               }
+               ib_destroy_qp(qp);
                ib_uverbs_release_uevent(file, &uqp->uevent);
                kfree(uqp);
        }
index 867b8cf..19c6477 100644 (file)
@@ -666,18 +666,6 @@ skip_cqe:
        return ret;
 }
 
-static void invalidate_mr(struct c4iw_dev *rhp, u32 rkey)
-{
-       struct c4iw_mr *mhp;
-       unsigned long flags;
-
-       spin_lock_irqsave(&rhp->lock, flags);
-       mhp = get_mhp(rhp, rkey >> 8);
-       if (mhp)
-               mhp->attr.state = 0;
-       spin_unlock_irqrestore(&rhp->lock, flags);
-}
-
 /*
  * Get one cq entry from c4iw and map it to openib.
  *
@@ -733,7 +721,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
                    CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
                        wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
                        wc->wc_flags |= IB_WC_WITH_INVALIDATE;
-                       invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
+                       c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
                }
        } else {
                switch (CQE_OPCODE(&cqe)) {
@@ -762,7 +750,8 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 
                        /* Invalidate the MR if the fastreg failed */
                        if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS)
-                               invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe));
+                               c4iw_invalidate_mr(qhp->rhp,
+                                                  CQE_WRID_FR_STAG(&cqe));
                        break;
                default:
                        printk(KERN_ERR MOD "Unexpected opcode %d "
index 7e7f79e..4788e1a 100644 (file)
@@ -999,6 +999,6 @@ extern int db_coalescing_threshold;
 extern int use_dsgl;
 void c4iw_drain_rq(struct ib_qp *qp);
 void c4iw_drain_sq(struct ib_qp *qp);
-
+void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey);
 
 #endif
index 80e2774..410408f 100644 (file)
@@ -770,3 +770,15 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
        kfree(mhp);
        return 0;
 }
+
+void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey)
+{
+       struct c4iw_mr *mhp;
+       unsigned long flags;
+
+       spin_lock_irqsave(&rhp->lock, flags);
+       mhp = get_mhp(rhp, rkey >> 8);
+       if (mhp)
+               mhp->attr.state = 0;
+       spin_unlock_irqrestore(&rhp->lock, flags);
+}
index f57deba..b7ac97b 100644 (file)
@@ -706,12 +706,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
        return 0;
 }
 
-static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe,
-                         struct ib_send_wr *wr, u8 *len16)
+static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
 {
-       struct c4iw_mr *mhp = get_mhp(dev, wr->ex.invalidate_rkey >> 8);
-
-       mhp->attr.state = 0;
        wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
        wqe->inv.r2 = 0;
        *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
@@ -797,11 +793,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        spin_lock_irqsave(&qhp->lock, flag);
        if (t4_wq_in_error(&qhp->wq)) {
                spin_unlock_irqrestore(&qhp->lock, flag);
+               *bad_wr = wr;
                return -EINVAL;
        }
        num_wrs = t4_sq_avail(&qhp->wq);
        if (num_wrs == 0) {
                spin_unlock_irqrestore(&qhp->lock, flag);
+               *bad_wr = wr;
                return -ENOMEM;
        }
        while (wr) {
@@ -840,10 +838,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                case IB_WR_RDMA_READ_WITH_INV:
                        fw_opcode = FW_RI_RDMA_READ_WR;
                        swsqe->opcode = FW_RI_READ_REQ;
-                       if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
+                       if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
+                               c4iw_invalidate_mr(qhp->rhp,
+                                                  wr->sg_list[0].lkey);
                                fw_flags = FW_RI_RDMA_READ_INVALIDATE;
-                       else
+                       } else {
                                fw_flags = 0;
+                       }
                        err = build_rdma_read(wqe, wr, &len16);
                        if (err)
                                break;
@@ -876,7 +877,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                fw_flags |= FW_RI_LOCAL_FENCE_FLAG;
                        fw_opcode = FW_RI_INV_LSTAG_WR;
                        swsqe->opcode = FW_RI_LOCAL_INV;
-                       err = build_inv_stag(qhp->rhp, wqe, wr, &len16);
+                       err = build_inv_stag(wqe, wr, &len16);
+                       c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey);
                        break;
                default:
                        PDBG("%s post of type=%d TBD!\n", __func__,
@@ -934,11 +936,13 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
        spin_lock_irqsave(&qhp->lock, flag);
        if (t4_wq_in_error(&qhp->wq)) {
                spin_unlock_irqrestore(&qhp->lock, flag);
+               *bad_wr = wr;
                return -EINVAL;
        }
        num_wrs = t4_rq_avail(&qhp->wq);
        if (num_wrs == 0) {
                spin_unlock_irqrestore(&qhp->lock, flag);
+               *bad_wr = wr;
                return -ENOMEM;
        }
        while (wr) {
index a26a9a0..67ea85a 100644 (file)
@@ -775,75 +775,3 @@ void hfi1_put_proc_affinity(int cpu)
        }
        mutex_unlock(&affinity->lock);
 }
-
-int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
-                          size_t count)
-{
-       struct hfi1_affinity_node *entry;
-       cpumask_var_t mask;
-       int ret, i;
-
-       mutex_lock(&node_affinity.lock);
-       entry = node_affinity_lookup(dd->node);
-
-       if (!entry) {
-               ret = -EINVAL;
-               goto unlock;
-       }
-
-       ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
-       if (!ret) {
-               ret = -ENOMEM;
-               goto unlock;
-       }
-
-       ret = cpulist_parse(buf, mask);
-       if (ret)
-               goto out;
-
-       if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) {
-               dd_dev_warn(dd, "Invalid CPU mask\n");
-               ret = -EINVAL;
-               goto out;
-       }
-
-       /* reset the SDMA interrupt affinity details */
-       init_cpu_mask_set(&entry->def_intr);
-       cpumask_copy(&entry->def_intr.mask, mask);
-
-       /* Reassign the affinity for each SDMA interrupt. */
-       for (i = 0; i < dd->num_msix_entries; i++) {
-               struct hfi1_msix_entry *msix;
-
-               msix = &dd->msix_entries[i];
-               if (msix->type != IRQ_SDMA)
-                       continue;
-
-               ret = get_irq_affinity(dd, msix);
-
-               if (ret)
-                       break;
-       }
-out:
-       free_cpumask_var(mask);
-unlock:
-       mutex_unlock(&node_affinity.lock);
-       return ret ? ret : strnlen(buf, PAGE_SIZE);
-}
-
-int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf)
-{
-       struct hfi1_affinity_node *entry;
-
-       mutex_lock(&node_affinity.lock);
-       entry = node_affinity_lookup(dd->node);
-
-       if (!entry) {
-               mutex_unlock(&node_affinity.lock);
-               return -EINVAL;
-       }
-
-       cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask);
-       mutex_unlock(&node_affinity.lock);
-       return strnlen(buf, PAGE_SIZE);
-}
index b89ea3c..42e6331 100644 (file)
@@ -102,10 +102,6 @@ int hfi1_get_proc_affinity(int);
 /* Release a CPU used by a user process. */
 void hfi1_put_proc_affinity(int);
 
-int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf);
-int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
-                          size_t count);
-
 struct hfi1_affinity_node {
        int node;
        struct cpu_mask_set def_intr;
index 9bf5f23..24d0820 100644 (file)
@@ -6301,19 +6301,8 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
        /* leave shared count at zero for both global and VL15 */
        write_global_credit(dd, vau, vl15buf, 0);
 
-       /* We may need some credits for another VL when sending packets
-        * with the snoop interface. Dividing it down the middle for VL15
-        * and VL0 should suffice.
-        */
-       if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
-               write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
-                   << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
-               write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
-                   << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
-       } else {
-               write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
-                       << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
-       }
+       write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
+                 << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
 }
 
 /*
@@ -9915,9 +9904,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
        u32 mask = ~((1U << ppd->lmc) - 1);
        u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
 
-       if (dd->hfi1_snoop.mode_flag)
-               dd_dev_info(dd, "Set lid/lmc while snooping");
-
        c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
                | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
        c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
@@ -12112,7 +12098,7 @@ static void update_synth_timer(unsigned long opaque)
        mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
 }
 
-#define C_MAX_NAME 13 /* 12 chars + one for /0 */
+#define C_MAX_NAME 16 /* 15 chars + one for /0 */
 static int init_cntrs(struct hfi1_devdata *dd)
 {
        int i, rcv_ctxts, j;
@@ -14463,7 +14449,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
         * Any error printing is already done by the init code.
         * On return, we have the chip mapped.
         */
-       ret = hfi1_pcie_ddinit(dd, pdev, ent);
+       ret = hfi1_pcie_ddinit(dd, pdev);
        if (ret < 0)
                goto bail_free;
 
@@ -14691,6 +14677,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
        if (ret)
                goto bail_free_cntrs;
 
+       init_completion(&dd->user_comp);
+
+       /* The user refcount starts with one to inidicate an active device */
+       atomic_set(&dd->user_refcount, 1);
+
        goto bail;
 
 bail_free_rcverr:
index 9234525..043fd21 100644 (file)
 /* DC_DC8051_CFG_MODE.GENERAL bits */
 #define DISABLE_SELF_GUID_CHECK 0x2
 
+/* Bad L2 frame error code */
+#define BAD_L2_ERR      0x6
+
 /*
  * Eager buffer minimum and maximum sizes supported by the hardware.
  * All power-of-two sizes in between are supported as well.
index 6563e4d..c5efff2 100644 (file)
@@ -599,7 +599,6 @@ static void __prescan_rxq(struct hfi1_packet *packet)
                                         dd->rhf_offset;
                struct rvt_qp *qp;
                struct ib_header *hdr;
-               struct ib_other_headers *ohdr;
                struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
                u64 rhf = rhf_to_cpu(rhf_addr);
                u32 etype = rhf_rcv_type(rhf), qpn, bth1;
@@ -615,18 +614,21 @@ static void __prescan_rxq(struct hfi1_packet *packet)
                if (etype != RHF_RCV_TYPE_IB)
                        goto next;
 
-               hdr = hfi1_get_msgheader(dd, rhf_addr);
+               packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
+               hdr = packet->hdr;
 
                lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 
-               if (lnh == HFI1_LRH_BTH)
-                       ohdr = &hdr->u.oth;
-               else if (lnh == HFI1_LRH_GRH)
-                       ohdr = &hdr->u.l.oth;
-               else
+               if (lnh == HFI1_LRH_BTH) {
+                       packet->ohdr = &hdr->u.oth;
+               } else if (lnh == HFI1_LRH_GRH) {
+                       packet->ohdr = &hdr->u.l.oth;
+                       packet->rcv_flags |= HFI1_HAS_GRH;
+               } else {
                        goto next; /* just in case */
+               }
 
-               bth1 = be32_to_cpu(ohdr->bth[1]);
+               bth1 = be32_to_cpu(packet->ohdr->bth[1]);
                is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
 
                if (!is_ecn)
@@ -646,7 +648,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
 
                /* turn off BECN, FECN */
                bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK);
-               ohdr->bth[1] = cpu_to_be32(bth1);
+               packet->ohdr->bth[1] = cpu_to_be32(bth1);
 next:
                update_ps_mdata(&mdata, rcd);
        }
@@ -1360,12 +1362,25 @@ int process_receive_ib(struct hfi1_packet *packet)
 
 int process_receive_bypass(struct hfi1_packet *packet)
 {
+       struct hfi1_devdata *dd = packet->rcd->dd;
+
        if (unlikely(rhf_err_flags(packet->rhf)))
                handle_eflags(packet);
 
-       dd_dev_err(packet->rcd->dd,
+       dd_dev_err(dd,
                   "Bypass packets are not supported in normal operation. Dropping\n");
-       incr_cntr64(&packet->rcd->dd->sw_rcv_bypass_packet_errors);
+       incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
+       if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
+               u64 *flits = packet->ebuf;
+
+               if (flits && !(packet->rhf & RHF_LEN_ERR)) {
+                       dd->err_info_rcvport.packet_flit1 = flits[0];
+                       dd->err_info_rcvport.packet_flit2 =
+                               packet->tlen > sizeof(flits[0]) ? flits[1] : 0;
+               }
+               dd->err_info_rcvport.status_and_code |=
+                       (OPA_EI_STATUS_SMASK | BAD_L2_ERR);
+       }
        return RHF_RCV_CONTINUE;
 }
 
index 677efa0..bd786b7 100644 (file)
@@ -172,6 +172,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
                                               struct hfi1_devdata,
                                               user_cdev);
 
+       if (!atomic_inc_not_zero(&dd->user_refcount))
+               return -ENXIO;
+
        /* Just take a ref now. Not all opens result in a context assign */
        kobject_get(&dd->kobj);
 
@@ -183,11 +186,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
                fd->rec_cpu_num = -1; /* no cpu affinity by default */
                fd->mm = current->mm;
                atomic_inc(&fd->mm->mm_count);
-       }
+               fp->private_data = fd;
+       } else {
+               fp->private_data = NULL;
+
+               if (atomic_dec_and_test(&dd->user_refcount))
+                       complete(&dd->user_comp);
 
-       fp->private_data = fd;
+               return -ENOMEM;
+       }
 
-       return fd ? 0 : -ENOMEM;
+       return 0;
 }
 
 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -798,6 +807,10 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 done:
        mmdrop(fdata->mm);
        kobject_put(&dd->kobj);
+
+       if (atomic_dec_and_test(&dd->user_refcount))
+               complete(&dd->user_comp);
+
        kfree(fdata);
        return 0;
 }
index 7eef11b..cc87fd4 100644 (file)
@@ -367,26 +367,6 @@ struct hfi1_packet {
        u8 etype;
 };
 
-/*
- * Private data for snoop/capture support.
- */
-struct hfi1_snoop_data {
-       int mode_flag;
-       struct cdev cdev;
-       struct device *class_dev;
-       /* protect snoop data */
-       spinlock_t snoop_lock;
-       struct list_head queue;
-       wait_queue_head_t waitq;
-       void *filter_value;
-       int (*filter_callback)(void *hdr, void *data, void *value);
-       u64 dcc_cfg; /* saved value of DCC Cfg register */
-};
-
-/* snoop mode_flag values */
-#define HFI1_PORT_SNOOP_MODE     1U
-#define HFI1_PORT_CAPTURE_MODE   2U
-
 struct rvt_sge_state;
 
 /*
@@ -613,8 +593,6 @@ struct hfi1_pportdata {
        struct mutex hls_lock;
        u32 host_link_state;
 
-       spinlock_t            sdma_alllock ____cacheline_aligned_in_smp;
-
        u32 lstate;     /* logical link state */
 
        /* these are the "32 bit" regs */
@@ -1104,8 +1082,6 @@ struct hfi1_devdata {
        char *portcntrnames;
        size_t portcntrnameslen;
 
-       struct hfi1_snoop_data hfi1_snoop;
-
        struct err_info_rcvport err_info_rcvport;
        struct err_info_constraint err_info_rcv_constraint;
        struct err_info_constraint err_info_xmit_constraint;
@@ -1141,8 +1117,8 @@ struct hfi1_devdata {
        rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
 
        /*
-        * Handlers for outgoing data so that snoop/capture does not
-        * have to have its hooks in the send path
+        * Capability to have different send engines simply by changing a
+        * pointer value.
         */
        send_routine process_pio_send;
        send_routine process_dma_send;
@@ -1174,6 +1150,10 @@ struct hfi1_devdata {
        spinlock_t aspm_lock;
        /* Number of verbs contexts which have disabled ASPM */
        atomic_t aspm_disabled_cnt;
+       /* Keeps track of user space clients */
+       atomic_t user_refcount;
+       /* Used to wait for outstanding user space clients before dev removal */
+       struct completion user_comp;
 
        struct hfi1_affinity *affinity;
        struct rhashtable sdma_rht;
@@ -1221,8 +1201,6 @@ struct hfi1_devdata *hfi1_lookup(int unit);
 extern u32 hfi1_cpulist_count;
 extern unsigned long *hfi1_cpulist;
 
-extern unsigned int snoop_drop_send;
-extern unsigned int snoop_force_capture;
 int hfi1_init(struct hfi1_devdata *, int);
 int hfi1_count_units(int *npresentp, int *nupp);
 int hfi1_count_active_units(void);
@@ -1557,13 +1535,6 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf);
 void reset_link_credits(struct hfi1_devdata *dd);
 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
 
-int snoop_recv_handler(struct hfi1_packet *packet);
-int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
-                          u64 pbc);
-int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
-                          u64 pbc);
-void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf,
-                          u64 pbc, const void *from, size_t count);
 int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc);
 
 static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd)
@@ -1763,8 +1734,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
 
 int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *);
 void hfi1_pcie_cleanup(struct pci_dev *);
-int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *,
-                    const struct pci_device_id *);
+int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *);
 void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
 void hfi1_pcie_flr(struct hfi1_devdata *);
 int pcie_speeds(struct hfi1_devdata *);
@@ -1799,8 +1769,6 @@ int kdeth_process_expected(struct hfi1_packet *packet);
 int kdeth_process_eager(struct hfi1_packet *packet);
 int process_receive_invalid(struct hfi1_packet *packet);
 
-extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8];
-
 void update_sge(struct rvt_sge_state *ss, u32 length);
 
 /* global module parameter variables */
@@ -1827,9 +1795,6 @@ extern struct mutex hfi1_mutex;
 #define DRIVER_NAME            "hfi1"
 #define HFI1_USER_MINOR_BASE     0
 #define HFI1_TRACE_MINOR         127
-#define HFI1_DIAGPKT_MINOR       128
-#define HFI1_DIAG_MINOR_BASE     129
-#define HFI1_SNOOP_CAPTURE_BASE  200
 #define HFI1_NMINORS             255
 
 #define PCI_VENDOR_ID_INTEL 0x8086
@@ -1848,7 +1813,13 @@ extern struct mutex hfi1_mutex;
 static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
                                                  u16 ctxt_type)
 {
-       u64 base_sc_integrity =
+       u64 base_sc_integrity;
+
+       /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
+       if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
+               return 0;
+
+       base_sc_integrity =
        SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
        | SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK
        | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
@@ -1863,7 +1834,6 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
        | SEND_CTXT_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
        | SEND_CTXT_CHECK_ENABLE_CHECK_OPCODE_SMASK
        | SEND_CTXT_CHECK_ENABLE_CHECK_SLID_SMASK
-       | SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK
        | SEND_CTXT_CHECK_ENABLE_CHECK_VL_SMASK
        | SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK;
 
@@ -1872,18 +1842,23 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
        else
                base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
 
-       if (is_ax(dd))
-               /* turn off send-side job key checks - A0 */
-               return base_sc_integrity &
-                      ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+       /* turn on send-side job key checks if !A0 */
+       if (!is_ax(dd))
+               base_sc_integrity |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+
        return base_sc_integrity;
 }
 
 static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
 {
-       u64 base_sdma_integrity =
+       u64 base_sdma_integrity;
+
+       /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
+       if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
+               return 0;
+
+       base_sdma_integrity =
        SEND_DMA_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
-       | SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK
        | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
        | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK
        | SEND_DMA_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK
@@ -1895,14 +1870,18 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
        | SEND_DMA_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
        | SEND_DMA_CHECK_ENABLE_CHECK_OPCODE_SMASK
        | SEND_DMA_CHECK_ENABLE_CHECK_SLID_SMASK
-       | SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK
        | SEND_DMA_CHECK_ENABLE_CHECK_VL_SMASK
        | SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK;
 
-       if (is_ax(dd))
-               /* turn off send-side job key checks - A0 */
-               return base_sdma_integrity &
-                      ~SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+       if (!HFI1_CAP_IS_KSET(STATIC_RATE_CTRL))
+               base_sdma_integrity |=
+               SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK;
+
+       /* turn on send-side job key checks if !A0 */
+       if (!is_ax(dd))
+               base_sdma_integrity |=
+                       SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+
        return base_sdma_integrity;
 }
 
index 60db615..e3b5bc9 100644 (file)
@@ -144,6 +144,8 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
                struct hfi1_ctxtdata *rcd;
 
                ppd = dd->pport + (i % dd->num_pports);
+
+               /* dd->rcd[i] gets assigned inside the callee */
                rcd = hfi1_create_ctxtdata(ppd, i, dd->node);
                if (!rcd) {
                        dd_dev_err(dd,
@@ -169,8 +171,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
                if (!rcd->sc) {
                        dd_dev_err(dd,
                                   "Unable to allocate kernel send context, failing\n");
-                       dd->rcd[rcd->ctxt] = NULL;
-                       hfi1_free_ctxtdata(dd, rcd);
                        goto nomem;
                }
 
@@ -178,9 +178,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
                if (ret < 0) {
                        dd_dev_err(dd,
                                   "Failed to setup kernel receive context, failing\n");
-                       sc_free(rcd->sc);
-                       dd->rcd[rcd->ctxt] = NULL;
-                       hfi1_free_ctxtdata(dd, rcd);
                        ret = -EFAULT;
                        goto bail;
                }
@@ -196,6 +193,10 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 nomem:
        ret = -ENOMEM;
 bail:
+       if (dd->rcd) {
+               for (i = 0; i < dd->num_rcv_contexts; ++i)
+                       hfi1_free_ctxtdata(dd, dd->rcd[i]);
+       }
        kfree(dd->rcd);
        dd->rcd = NULL;
        return ret;
@@ -216,7 +217,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
            dd->num_rcv_contexts - dd->first_user_ctxt)
                kctxt_ngroups = (dd->rcv_entries.nctxt_extra -
                                 (dd->num_rcv_contexts - dd->first_user_ctxt));
-       rcd = kzalloc(sizeof(*rcd), GFP_KERNEL);
+       rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa);
        if (rcd) {
                u32 rcvtids, max_entries;
 
@@ -261,13 +262,6 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
                }
                rcd->eager_base = base * dd->rcv_entries.group_size;
 
-               /* Validate and initialize Rcv Hdr Q variables */
-               if (rcvhdrcnt % HDRQ_INCREMENT) {
-                       dd_dev_err(dd,
-                                  "ctxt%u: header queue count %d must be divisible by %lu\n",
-                                  rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT);
-                       goto bail;
-               }
                rcd->rcvhdrq_cnt = rcvhdrcnt;
                rcd->rcvhdrqentsize = hfi1_hdrq_entsize;
                /*
@@ -506,7 +500,6 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
        INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
 
        mutex_init(&ppd->hls_lock);
-       spin_lock_init(&ppd->sdma_alllock);
        spin_lock_init(&ppd->qsfp_info.qsfp_lock);
 
        ppd->qsfp_info.ppd = ppd;
@@ -1399,28 +1392,43 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
        hfi1_free_devdata(dd);
 }
 
+static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
+{
+       if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
+               hfi1_early_err(dev, "Receive header queue count too small\n");
+               return -EINVAL;
+       }
+
+       if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+               hfi1_early_err(dev,
+                              "Receive header queue count cannot be greater than %u\n",
+                              HFI1_MAX_HDRQ_EGRBUF_CNT);
+               return -EINVAL;
+       }
+
+       if (thecnt % HDRQ_INCREMENT) {
+               hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
+                              thecnt, HDRQ_INCREMENT);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        int ret = 0, j, pidx, initfail;
-       struct hfi1_devdata *dd = ERR_PTR(-EINVAL);
+       struct hfi1_devdata *dd;
        struct hfi1_pportdata *ppd;
 
        /* First, lock the non-writable module parameters */
        HFI1_CAP_LOCK();
 
        /* Validate some global module parameters */
-       if (rcvhdrcnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
-               hfi1_early_err(&pdev->dev, "Header queue  count too small\n");
-               ret = -EINVAL;
-               goto bail;
-       }
-       if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
-               hfi1_early_err(&pdev->dev,
-                              "Receive header queue count cannot be greater than %u\n",
-                              HFI1_MAX_HDRQ_EGRBUF_CNT);
-               ret = -EINVAL;
+       ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
+       if (ret)
                goto bail;
-       }
+
        /* use the encoding function as a sanitization check */
        if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
                hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
@@ -1461,26 +1469,25 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (ret)
                goto bail;
 
-       /*
-        * Do device-specific initialization, function table setup, dd
-        * allocation, etc.
-        */
-       switch (ent->device) {
-       case PCI_DEVICE_ID_INTEL0:
-       case PCI_DEVICE_ID_INTEL1:
-               dd = hfi1_init_dd(pdev, ent);
-               break;
-       default:
+       if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
+             ent->device == PCI_DEVICE_ID_INTEL1)) {
                hfi1_early_err(&pdev->dev,
                               "Failing on unknown Intel deviceid 0x%x\n",
                               ent->device);
                ret = -ENODEV;
+               goto clean_bail;
        }
 
-       if (IS_ERR(dd))
+       /*
+        * Do device-specific initialization, function table setup, dd
+        * allocation, etc.
+        */
+       dd = hfi1_init_dd(pdev, ent);
+
+       if (IS_ERR(dd)) {
                ret = PTR_ERR(dd);
-       if (ret)
                goto clean_bail; /* error already printed */
+       }
 
        ret = create_workqueues(dd);
        if (ret)
@@ -1538,12 +1545,31 @@ bail:
        return ret;
 }
 
+static void wait_for_clients(struct hfi1_devdata *dd)
+{
+       /*
+        * Remove the device init value and complete the device if there is
+        * no clients or wait for active clients to finish.
+        */
+       if (atomic_dec_and_test(&dd->user_refcount))
+               complete(&dd->user_comp);
+
+       wait_for_completion(&dd->user_comp);
+}
+
 static void remove_one(struct pci_dev *pdev)
 {
        struct hfi1_devdata *dd = pci_get_drvdata(pdev);
 
        /* close debugfs files before ib unregister */
        hfi1_dbg_ibdev_exit(&dd->verbs_dev);
+
+       /* remove the /dev hfi1 interface */
+       hfi1_device_remove(dd);
+
+       /* wait for existing user space clients to finish */
+       wait_for_clients(dd);
+
        /* unregister from IB core */
        hfi1_unregister_ib_device(dd);
 
@@ -1558,8 +1584,6 @@ static void remove_one(struct pci_dev *pdev)
        /* wait until all of our (qsfp) queue_work() calls complete */
        flush_workqueue(ib_wq);
 
-       hfi1_device_remove(dd);
-
        postinit_cleanup(dd);
 }
 
index 89c68da..4ac8f33 100644 (file)
@@ -157,8 +157,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev)
  * fields required to re-initialize after a chip reset, or for
  * various other purposes
  */
-int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev,
-                    const struct pci_device_id *ent)
+int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
 {
        unsigned long len;
        resource_size_t addr;
index 50a3a36..d89b874 100644 (file)
@@ -668,19 +668,12 @@ void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold)
 void set_pio_integrity(struct send_context *sc)
 {
        struct hfi1_devdata *dd = sc->dd;
-       u64 reg = 0;
        u32 hw_context = sc->hw_context;
        int type = sc->type;
 
-       /*
-        * No integrity checks if HFI1_CAP_NO_INTEGRITY is set, or if
-        * we're snooping.
-        */
-       if (likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) &&
-           dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE)
-               reg = hfi1_pkt_default_send_ctxt_mask(dd, type);
-
-       write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), reg);
+       write_kctxt_csr(dd, hw_context,
+                       SC(CHECK_ENABLE),
+                       hfi1_pkt_default_send_ctxt_mask(dd, type));
 }
 
 static u32 get_buffers_allocated(struct send_context *sc)
index 8bc5013..83198a8 100644 (file)
@@ -89,7 +89,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
 
        lockdep_assert_held(&qp->s_lock);
        qp->s_flags |= RVT_S_WAIT_RNR;
-       qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
+       priv->s_rnr_timer.expires = jiffies + usecs_to_jiffies(to);
        add_timer(&priv->s_rnr_timer);
 }
 
index fd39bca..9cbe52d 100644 (file)
@@ -2009,11 +2009,6 @@ static void sdma_hw_start_up(struct sdma_engine *sde)
        write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg);
 }
 
-#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
-(r &= ~SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
-
-#define SET_STATIC_RATE_CONTROL_SMASK(r) \
-(r |= SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
 /*
  * set_sdma_integrity
  *
@@ -2022,19 +2017,9 @@ static void sdma_hw_start_up(struct sdma_engine *sde)
 static void set_sdma_integrity(struct sdma_engine *sde)
 {
        struct hfi1_devdata *dd = sde->dd;
-       u64 reg;
-
-       if (unlikely(HFI1_CAP_IS_KSET(NO_INTEGRITY)))
-               return;
-
-       reg = hfi1_pkt_base_sdma_integrity(dd);
-
-       if (HFI1_CAP_IS_KSET(STATIC_RATE_CTRL))
-               CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
-       else
-               SET_STATIC_RATE_CONTROL_SMASK(reg);
 
-       write_sde_csr(sde, SD(CHECK_ENABLE), reg);
+       write_sde_csr(sde, SD(CHECK_ENABLE),
+                     hfi1_pkt_base_sdma_integrity(dd));
 }
 
 static void init_sdma_regs(
index edba224..919a547 100644 (file)
@@ -49,7 +49,6 @@
 #include "hfi.h"
 #include "mad.h"
 #include "trace.h"
-#include "affinity.h"
 
 /*
  * Start of per-port congestion control structures and support code
@@ -623,27 +622,6 @@ static ssize_t show_tempsense(struct device *device,
        return ret;
 }
 
-static ssize_t show_sdma_affinity(struct device *device,
-                                 struct device_attribute *attr, char *buf)
-{
-       struct hfi1_ibdev *dev =
-               container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
-       struct hfi1_devdata *dd = dd_from_dev(dev);
-
-       return hfi1_get_sdma_affinity(dd, buf);
-}
-
-static ssize_t store_sdma_affinity(struct device *device,
-                                  struct device_attribute *attr,
-                                  const char *buf, size_t count)
-{
-       struct hfi1_ibdev *dev =
-               container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
-       struct hfi1_devdata *dd = dd_from_dev(dev);
-
-       return hfi1_set_sdma_affinity(dd, buf, count);
-}
-
 /*
  * end of per-unit (or driver, in some cases, but replicated
  * per unit) functions
@@ -658,8 +636,6 @@ static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
 static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
 static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
 static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
-static DEVICE_ATTR(sdma_affinity, S_IWUSR | S_IRUGO, show_sdma_affinity,
-                  store_sdma_affinity);
 
 static struct device_attribute *hfi1_attributes[] = {
        &dev_attr_hw_rev,
@@ -670,7 +646,6 @@ static struct device_attribute *hfi1_attributes[] = {
        &dev_attr_boardversion,
        &dev_attr_tempsense,
        &dev_attr_chip_reset,
-       &dev_attr_sdma_affinity,
 };
 
 int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
index 11e02b2..f77e59f 100644 (file)
@@ -253,66 +253,6 @@ TRACE_EVENT(hfi1_mmu_invalidate,
                      )
            );
 
-#define SNOOP_PRN \
-       "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \
-       "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]"
-
-TRACE_EVENT(snoop_capture,
-           TP_PROTO(struct hfi1_devdata *dd,
-                    int hdr_len,
-                    struct ib_header *hdr,
-                    int data_len,
-                    void *data),
-           TP_ARGS(dd, hdr_len, hdr, data_len, data),
-           TP_STRUCT__entry(
-                            DD_DEV_ENTRY(dd)
-                            __field(u16, slid)
-                            __field(u16, dlid)
-                            __field(u32, qpn)
-                            __field(u8, opcode)
-                            __field(u8, sl)
-                            __field(u16, pkey)
-                            __field(u32, hdr_len)
-                            __field(u32, data_len)
-                            __field(u8, lnh)
-                            __dynamic_array(u8, raw_hdr, hdr_len)
-                            __dynamic_array(u8, raw_pkt, data_len)
-                            ),
-           TP_fast_assign(
-               struct ib_other_headers *ohdr;
-
-               __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
-               if (__entry->lnh == HFI1_LRH_BTH)
-               ohdr = &hdr->u.oth;
-               else
-               ohdr = &hdr->u.l.oth;
-               DD_DEV_ASSIGN(dd);
-               __entry->slid = be16_to_cpu(hdr->lrh[3]);
-               __entry->dlid = be16_to_cpu(hdr->lrh[1]);
-               __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
-               __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
-               __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
-               __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff;
-               __entry->hdr_len = hdr_len;
-               __entry->data_len = data_len;
-               memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len);
-               memcpy(__get_dynamic_array(raw_pkt), data, data_len);
-               ),
-           TP_printk(
-               "[%s] " SNOOP_PRN,
-               __get_str(dev),
-               __entry->slid,
-               __entry->dlid,
-               __entry->qpn,
-               __entry->opcode,
-               show_ib_opcode(__entry->opcode),
-               __entry->sl,
-               __entry->pkey,
-               __entry->hdr_len,
-               __entry->data_len
-               )
-);
-
 #endif /* __HFI1_TRACE_RX_H */
 
 #undef TRACE_INCLUDE_PATH
index a761f80..77697d6 100644 (file)
@@ -1144,7 +1144,7 @@ static int pin_vector_pages(struct user_sdma_request *req,
        rb_node = hfi1_mmu_rb_extract(pq->handler,
                                      (unsigned long)iovec->iov.iov_base,
                                      iovec->iov.iov_len);
-       if (rb_node && !IS_ERR(rb_node))
+       if (rb_node)
                node = container_of(rb_node, struct sdma_mmu_node, rb);
        else
                rb_node = NULL;
index 5fc6233..b9bf075 100644 (file)
@@ -102,7 +102,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
        if (vlan_tag < 0x1000)
                vlan_tag |= (ah_attr->sl & 7) << 13;
        ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
-       ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
+       ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
+       if (ret < 0)
+               return ERR_PTR(ret);
+       ah->av.eth.gid_index = ret;
        ah->av.eth.vlan = cpu_to_be16(vlan_tag);
        ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
        if (ah_attr->static_rate) {
index 1ea686b..6a0fec3 100644 (file)
@@ -253,11 +253,14 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
        if (context)
                if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
                        err = -EFAULT;
-                       goto err_dbmap;
+                       goto err_cq_free;
                }
 
        return &cq->ibcq;
 
+err_cq_free:
+       mlx4_cq_free(dev->dev, &cq->mcq);
+
 err_dbmap:
        if (context)
                mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
index 79d017b..fcd04b8 100644 (file)
@@ -932,8 +932,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
                if (err)
                        goto err_create;
        } else {
-               /* for now choose 64 bytes till we have a proper interface */
-               cqe_size = 64;
+               cqe_size = cache_line_size() == 128 ? 128 : 64;
                err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
                                       &index, &inlen);
                if (err)
index 63036c7..32b09f0 100644 (file)
@@ -2311,14 +2311,14 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 {
        struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
        struct ib_event ibev;
-
+       bool fatal = false;
        u8 port = 0;
 
        switch (event) {
        case MLX5_DEV_EVENT_SYS_ERROR:
-               ibdev->ib_active = false;
                ibev.event = IB_EVENT_DEVICE_FATAL;
                mlx5_ib_handle_internal_error(ibdev);
+               fatal = true;
                break;
 
        case MLX5_DEV_EVENT_PORT_UP:
@@ -2370,6 +2370,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 
        if (ibdev->ib_active)
                ib_dispatch_event(&ibev);
+
+       if (fatal)
+               ibdev->ib_active = false;
 }
 
 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
@@ -3115,7 +3118,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        }
        err = init_node_data(dev);
        if (err)
-               goto err_dealloc;
+               goto err_free_port;
 
        mutex_init(&dev->flow_db.lock);
        mutex_init(&dev->cap_mask_mutex);
@@ -3125,7 +3128,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        if (ll == IB_LINK_LAYER_ETHERNET) {
                err = mlx5_enable_roce(dev);
                if (err)
-                       goto err_dealloc;
+                       goto err_free_port;
        }
 
        err = create_dev_resources(&dev->devr);
index dcdcd19..7d68990 100644 (file)
@@ -626,6 +626,8 @@ struct mlx5_ib_dev {
        struct mlx5_ib_resources        devr;
        struct mlx5_mr_cache            cache;
        struct timer_list               delay_timer;
+       /* Prevents soft lock on massive reg MRs */
+       struct mutex                    slow_path_mutex;
        int                             fill_delay;
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
        struct ib_odp_caps      odp_caps;
index d4ad672..4e90124 100644 (file)
@@ -610,6 +610,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
        int err;
        int i;
 
+       mutex_init(&dev->slow_path_mutex);
        cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
        if (!cache->wq) {
                mlx5_ib_warn(dev, "failed to create work queue\n");
@@ -1182,9 +1183,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                goto error;
        }
 
-       if (!mr)
+       if (!mr) {
+               mutex_lock(&dev->slow_path_mutex);
                mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
                                page_shift, access_flags);
+               mutex_unlock(&dev->slow_path_mutex);
+       }
 
        if (IS_ERR(mr)) {
                err = PTR_ERR(mr);
index 7ce97da..d1e9218 100644 (file)
@@ -2051,8 +2051,8 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 
                mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
                            qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
-                           to_mcq(init_attr->recv_cq)->mcq.cqn,
-                           to_mcq(init_attr->send_cq)->mcq.cqn);
+                           init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1,
+                           init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1);
 
                qp->trans_qp.xrcdn = xrcdn;
 
@@ -4814,6 +4814,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
                                 udata->inlen))
                return ERR_PTR(-EOPNOTSUPP);
 
+       if (init_attr->log_ind_tbl_size >
+           MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
+               mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n",
+                           init_attr->log_ind_tbl_size,
+                           MLX5_CAP_GEN(dev->mdev, log_max_rqt_size));
+               return ERR_PTR(-EINVAL);
+       }
+
        min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
        if (udata->outlen && udata->outlen < min_resp_len)
                return ERR_PTR(-EINVAL);
index 01f71ca..f2cefb0 100644 (file)
@@ -90,9 +90,6 @@ static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page,
        if (WARN_ON(!valid_dma_direction(direction)))
                return BAD_DMA_ADDRESS;
 
-       if (offset + size > PAGE_SIZE)
-               return BAD_DMA_ADDRESS;
-
        addr = (u64)page_address(page);
        if (addr)
                addr += offset;
index b8258e4..ffff5a5 100644 (file)
@@ -243,10 +243,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
 {
        int err;
        struct socket *sock;
-       struct udp_port_cfg udp_cfg;
-       struct udp_tunnel_sock_cfg tnl_cfg;
-
-       memset(&udp_cfg, 0, sizeof(udp_cfg));
+       struct udp_port_cfg udp_cfg = {0};
+       struct udp_tunnel_sock_cfg tnl_cfg = {0};
 
        if (ipv6) {
                udp_cfg.family = AF_INET6;
@@ -264,10 +262,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
                return ERR_PTR(err);
        }
 
-       tnl_cfg.sk_user_data = NULL;
        tnl_cfg.encap_type = 1;
        tnl_cfg.encap_rcv = rxe_udp_encap_recv;
-       tnl_cfg.encap_destroy = NULL;
 
        /* Setup UDP tunnel */
        setup_udp_tunnel_sock(net, sock, &tnl_cfg);
index b8036cf..c3e60e4 100644 (file)
@@ -522,6 +522,7 @@ static void rxe_qp_reset(struct rxe_qp *qp)
        if (qp->sq.queue) {
                __rxe_do_task(&qp->comp.task);
                __rxe_do_task(&qp->req.task);
+               rxe_queue_reset(qp->sq.queue);
        }
 
        /* cleanup attributes */
@@ -573,6 +574,7 @@ void rxe_qp_error(struct rxe_qp *qp)
 {
        qp->req.state = QP_STATE_ERROR;
        qp->resp.state = QP_STATE_ERROR;
+       qp->attr.qp_state = IB_QPS_ERR;
 
        /* drain work and packet queues */
        rxe_run_task(&qp->resp.task, 1);
index 0827425..d14bf49 100644 (file)
@@ -84,6 +84,15 @@ err1:
        return -EINVAL;
 }
 
+inline void rxe_queue_reset(struct rxe_queue *q)
+{
+       /* queue is comprised from header and the memory
+        * of the actual queue. See "struct rxe_queue_buf" in rxe_queue.h
+        * reset only the queue itself and not the management header
+        */
+       memset(q->buf->data, 0, q->buf_size - sizeof(struct rxe_queue_buf));
+}
+
 struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
                                 int *num_elem,
                                 unsigned int elem_size)
index 239fd60..8c8641c 100644 (file)
@@ -84,6 +84,8 @@ int do_mmap_info(struct rxe_dev *rxe,
                 size_t buf_size,
                 struct rxe_mmap_info **ip_p);
 
+void rxe_queue_reset(struct rxe_queue *q);
+
 struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
                                 int *num_elem,
                                 unsigned int elem_size);
index 832846b..22bd963 100644 (file)
@@ -696,7 +696,8 @@ next_wqe:
                                                       qp->req.wqe_index);
                        wqe->state = wqe_state_done;
                        wqe->status = IB_WC_SUCCESS;
-                       goto complete;
+                       __rxe_do_task(&qp->comp.task);
+                       return 0;
                }
                payload = mtu;
        }
@@ -745,13 +746,17 @@ err:
        wqe->status = IB_WC_LOC_PROT_ERR;
        wqe->state = wqe_state_error;
 
-complete:
-       if (qp_type(qp) != IB_QPT_RC) {
-               while (rxe_completer(qp) == 0)
-                       ;
-       }
-
-       return 0;
+       /*
+        * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS
+        * ---------8<---------8<-------------
+        * ...Note that if a completion error occurs, a Work Completion
+        * will always be generated, even if the signaling
+        * indicator requests an Unsignaled Completion.
+        * ---------8<---------8<-------------
+        */
+       wqe->wr.send_flags |= IB_SEND_SIGNALED;
+       __rxe_do_task(&qp->comp.task);
+       return -EAGAIN;
 
 exit:
        return -EAGAIN;