OSDN Git Service

IB/hfi1: TID RDMA flow allocation
authorKaike Wan <kaike.wan@intel.com>
Tue, 5 Feb 2019 22:13:13 +0000 (14:13 -0800)
committerDoug Ledford <dledford@redhat.com>
Tue, 5 Feb 2019 22:53:54 +0000 (17:53 -0500)
The hfi1 hardware flow is a hardware flow-control mechanism for a KDETH
data packet that is received on a hfi1 port. It validates the packet by
checking both the generation and sequence. Each QP that uses the TID RDMA
mechanism will allocate a hardware flow from its receiving context for
any incoming KDETH data packets.

This patch implements:
(1) a function to allocate hardware flow
(2) a function to free hardware flow
(3) a function to initialize hardware flow generation for a receiving
    context
(4) a wait mechanism if the hardware flow is not available
(4) a function to remove the qp from the wait queue for hardware flow
    when the qp is reset or destroyed.

Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/common.h
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/qp.c
drivers/infiniband/hw/hfi1/qp.h
drivers/infiniband/hw/hfi1/tid_rdma.c
drivers/infiniband/hw/hfi1/tid_rdma.h
drivers/infiniband/hw/hfi1/verbs.h

index 40d3cfb..7310a5d 100644 (file)
@@ -340,6 +340,10 @@ struct diag_pkt {
 
 #define HFI1_PSM_IOC_BASE_SEQ 0x0
 
+/* Number of BTH.PSN bits used for sequence number in expected rcvs */
+#define HFI1_KDETH_BTH_SEQ_SHIFT 11
+#define HFI1_KDETH_BTH_SEQ_MASK (BIT(HFI1_KDETH_BTH_SEQ_SHIFT) - 1)
+
 static inline __u64 rhf_to_cpu(const __le32 *rbuf)
 {
        return __le64_to_cpu(*((__le64 *)rbuf));
index 9aa0357..78aa344 100644 (file)
@@ -198,6 +198,14 @@ struct exp_tid_set {
 };
 
 typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
+
+struct tid_queue {
+       struct list_head queue_head;
+                       /* queue head for QP TID resource waiters */
+       u32 enqueue;    /* count of tid enqueues */
+       u32 dequeue;    /* count of tid dequeues */
+};
+
 struct hfi1_ctxtdata {
        /* rcvhdrq base, needs mmap before useful */
        void *rcvhdrq;
@@ -291,6 +299,10 @@ struct hfi1_ctxtdata {
        /* PSM Specific fields */
        /* lock protecting all Expected TID data */
        struct mutex exp_mutex;
+       /* lock protecting all Expected TID data of kernel contexts */
+       spinlock_t exp_lock;
+       /* Queue for QP's waiting for HW TID flows */
+       struct tid_queue flow_queue;
        /* when waiting for rcv or pioavail */
        wait_queue_head_t wait;
        /* uuid from PSM */
@@ -323,6 +335,9 @@ struct hfi1_ctxtdata {
         */
        u8 subctxt_cnt;
 
+       /* Bit mask to track free TID RDMA HW flows */
+       unsigned long flow_mask;
+       struct tid_flow_state flows[RXE_NUM_TID_FLOWS];
 };
 
 /**
index a8dbd0f..56830a5 100644 (file)
@@ -370,6 +370,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
                rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
 
                mutex_init(&rcd->exp_mutex);
+               spin_lock_init(&rcd->exp_lock);
+               INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
 
                hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
 
@@ -472,6 +474,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
                                                    GFP_KERNEL, numa);
                        if (!rcd->opstats)
                                goto bail;
+
+                       /* Initialize TID flow generations for the context */
+                       hfi1_kern_init_ctxt_generations(rcd);
                }
 
                *context = rcd;
@@ -771,6 +776,8 @@ static void enable_chip(struct hfi1_devdata *dd)
                        rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
                if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL))
                        rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+               if (HFI1_CAP_IS_KSET(TID_RDMA))
+                       rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB;
                hfi1_rcvctrl(dd, rcvmask, rcd);
                sc_enable(rcd->sc);
                hfi1_rcd_put(rcd);
index f822f92..69c38af 100644 (file)
@@ -738,6 +738,7 @@ void flush_qp_waiters(struct rvt_qp *qp)
 {
        lockdep_assert_held(&qp->s_lock);
        flush_iowait(qp);
+       hfi1_tid_rdma_flush_wait(qp);
 }
 
 void stop_send_queue(struct rvt_qp *qp)
@@ -745,6 +746,8 @@ void stop_send_queue(struct rvt_qp *qp)
        struct hfi1_qp_priv *priv = qp->priv;
 
        iowait_cancel_work(&priv->s_iowait);
+       if (cancel_work_sync(&priv->tid_rdma.trigger_work))
+               rvt_put_qp(qp);
 }
 
 void quiesce_qp(struct rvt_qp *qp)
index 7adb6df..ce25a27 100644 (file)
@@ -63,11 +63,13 @@ extern const struct rvt_operation_params hfi1_post_parms[];
  * HFI1_S_AHG_VALID - ahg header valid on chip
  * HFI1_S_AHG_CLEAR - have send engine clear ahg state
  * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
+ * HFI1_S_WAIT_TID_SPACE - a QP is waiting for TID resource
  * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
  */
 #define HFI1_S_AHG_VALID         0x80000000
 #define HFI1_S_AHG_CLEAR         0x40000000
 #define HFI1_S_WAIT_PIO_DRAIN    0x20000000
+#define HFI1_S_WAIT_TID_SPACE    0x10000000
 #define HFI1_S_MIN_BIT_MASK      0x01000000
 
 /*
index e8f57c0..7067121 100644 (file)
@@ -5,10 +5,28 @@
  */
 
 #include "hfi.h"
+#include "qp.h"
 #include "verbs.h"
 #include "tid_rdma.h"
 #include "trace.h"
 
+#define RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK BIT_ULL(32)
+#define RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK BIT_ULL(33)
+#define RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK BIT_ULL(34)
+#define RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK BIT_ULL(35)
+#define RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK BIT_ULL(37)
+#define RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK BIT_ULL(38)
+
+#define GENERATION_MASK 0xFFFFF
+
+static u32 mask_generation(u32 a)
+{
+       return a & GENERATION_MASK;
+}
+
+/* Reserved generation value to set to unused flows for kernel contexts */
+#define KERN_GENERATION_RESERVED mask_generation(U32_MAX)
+
 /*
  * J_KEY for kernel contexts when TID RDMA is used.
  * See generate_jkey() in hfi.h for more information.
@@ -60,6 +78,8 @@
  * C - Capcode
  */
 
+static void tid_rdma_trigger_resume(struct work_struct *work);
+
 static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
 {
        return
@@ -251,6 +271,12 @@ int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 
        spin_lock_init(&qpriv->opfn.lock);
        INIT_WORK(&qpriv->opfn.opfn_work, opfn_send_conn_request);
+       INIT_WORK(&qpriv->tid_rdma.trigger_work, tid_rdma_trigger_resume);
+       qpriv->flow_state.psn = 0;
+       qpriv->flow_state.index = RXE_NUM_TID_FLOWS;
+       qpriv->flow_state.last_index = RXE_NUM_TID_FLOWS;
+       qpriv->flow_state.generation = KERN_GENERATION_RESERVED;
+       INIT_LIST_HEAD(&qpriv->tid_wait);
 
        return 0;
 }
@@ -262,3 +288,417 @@ void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
        if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA))
                cancel_work_sync(&priv->opfn.opfn_work);
 }
+
+/* Flow and tid waiter functions */
+/**
+ * DOC: lock ordering
+ *
+ * There are two locks involved with the queuing
+ * routines: the qp s_lock and the exp_lock.
+ *
+ * Since the tid space allocation is called from
+ * the send engine, the qp s_lock is already held.
+ *
+ * The allocation routines will get the exp_lock.
+ *
+ * The first_qp() call is provided to allow the head of
+ * the rcd wait queue to be fetched under the exp_lock and
+ * followed by a drop of the exp_lock.
+ *
+ * Any qp in the wait list will have the qp reference count held
+ * to hold the qp in memory.
+ */
+
+/*
+ * return head of rcd wait list
+ *
+ * Must hold the exp_lock.
+ *
+ * Get a reference to the QP to hold the QP in memory.
+ *
+ * The caller must release the reference when the local
+ * is no longer being used.
+ */
+static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd,
+                              struct tid_queue *queue)
+       __must_hold(&rcd->exp_lock)
+{
+       struct hfi1_qp_priv *priv;
+
+       lockdep_assert_held(&rcd->exp_lock);
+       priv = list_first_entry_or_null(&queue->queue_head,
+                                       struct hfi1_qp_priv,
+                                       tid_wait);
+       if (!priv)
+               return NULL;
+       rvt_get_qp(priv->owner);
+       return priv->owner;
+}
+
+/**
+ * kernel_tid_waiters - determine rcd wait
+ * @rcd: the receive context
+ * @qp: the head of the qp being processed
+ *
+ * This routine will return false IFF
+ * the list is NULL or the head of the
+ * list is the indicated qp.
+ *
+ * Must hold the qp s_lock and the exp_lock.
+ *
+ * Return:
+ * false if either of the conditions below are statisfied:
+ * 1. The list is empty or
+ * 2. The indicated qp is at the head of the list and the
+ *    HFI1_S_WAIT_TID_SPACE bit is set in qp->s_flags.
+ * true is returned otherwise.
+ */
+static bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd,
+                              struct tid_queue *queue, struct rvt_qp *qp)
+       __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
+{
+       struct rvt_qp *fqp;
+       bool ret = true;
+
+       lockdep_assert_held(&qp->s_lock);
+       lockdep_assert_held(&rcd->exp_lock);
+       fqp = first_qp(rcd, queue);
+       if (!fqp || (fqp == qp && (qp->s_flags & HFI1_S_WAIT_TID_SPACE)))
+               ret = false;
+       rvt_put_qp(fqp);
+       return ret;
+}
+
+/**
+ * dequeue_tid_waiter - dequeue the qp from the list
+ * @qp - the qp to remove the wait list
+ *
+ * This routine removes the indicated qp from the
+ * wait list if it is there.
+ *
+ * This should be done after the hardware flow and
+ * tid array resources have been allocated.
+ *
+ * Must hold the qp s_lock and the rcd exp_lock.
+ *
+ * It assumes the s_lock to protect the s_flags
+ * field and to reliably test the HFI1_S_WAIT_TID_SPACE flag.
+ */
+static void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd,
+                              struct tid_queue *queue, struct rvt_qp *qp)
+       __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+
+       lockdep_assert_held(&qp->s_lock);
+       lockdep_assert_held(&rcd->exp_lock);
+       if (list_empty(&priv->tid_wait))
+               return;
+       list_del_init(&priv->tid_wait);
+       qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
+       queue->dequeue++;
+       rvt_put_qp(qp);
+}
+
+/**
+ * queue_qp_for_tid_wait - suspend QP on tid space
+ * @rcd: the receive context
+ * @qp: the qp
+ *
+ * The qp is inserted at the tail of the rcd
+ * wait queue and the HFI1_S_WAIT_TID_SPACE s_flag is set.
+ *
+ * Must hold the qp s_lock and the exp_lock.
+ */
+static void queue_qp_for_tid_wait(struct hfi1_ctxtdata *rcd,
+                                 struct tid_queue *queue, struct rvt_qp *qp)
+       __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+
+       lockdep_assert_held(&qp->s_lock);
+       lockdep_assert_held(&rcd->exp_lock);
+       if (list_empty(&priv->tid_wait)) {
+               qp->s_flags |= HFI1_S_WAIT_TID_SPACE;
+               list_add_tail(&priv->tid_wait, &queue->queue_head);
+               priv->tid_enqueue = ++queue->enqueue;
+               trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TID_SPACE);
+               rvt_get_qp(qp);
+       }
+}
+
+/**
+ * __trigger_tid_waiter - trigger tid waiter
+ * @qp: the qp
+ *
+ * This is a private entrance to schedule the qp
+ * assuming the caller is holding the qp->s_lock.
+ */
+static void __trigger_tid_waiter(struct rvt_qp *qp)
+       __must_hold(&qp->s_lock)
+{
+       lockdep_assert_held(&qp->s_lock);
+       if (!(qp->s_flags & HFI1_S_WAIT_TID_SPACE))
+               return;
+       trace_hfi1_qpwakeup(qp, HFI1_S_WAIT_TID_SPACE);
+       hfi1_schedule_send(qp);
+}
+
+/**
+ * tid_rdma_schedule_tid_wakeup - schedule wakeup for a qp
+ * @qp - the qp
+ *
+ * trigger a schedule or a waiting qp in a deadlock
+ * safe manner.  The qp reference is held prior
+ * to this call via first_qp().
+ *
+ * If the qp trigger was already scheduled (!rval)
+ * the the reference is dropped, otherwise the resume
+ * or the destroy cancel will dispatch the reference.
+ */
+static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp)
+{
+       struct hfi1_qp_priv *priv;
+       struct hfi1_ibport *ibp;
+       struct hfi1_pportdata *ppd;
+       struct hfi1_devdata *dd;
+       bool rval;
+
+       if (!qp)
+               return;
+
+       priv = qp->priv;
+       ibp = to_iport(qp->ibqp.device, qp->port_num);
+       ppd = ppd_from_ibp(ibp);
+       dd = dd_from_ibdev(qp->ibqp.device);
+
+       rval = queue_work_on(priv->s_sde ?
+                            priv->s_sde->cpu :
+                            cpumask_first(cpumask_of_node(dd->node)),
+                            ppd->hfi1_wq,
+                            &priv->tid_rdma.trigger_work);
+       if (!rval)
+               rvt_put_qp(qp);
+}
+
+/**
+ * tid_rdma_trigger_resume - field a trigger work request
+ * @work - the work item
+ *
+ * Complete the off qp trigger processing by directly
+ * calling the progress routine.
+ */
+static void tid_rdma_trigger_resume(struct work_struct *work)
+{
+       struct tid_rdma_qp_params *tr;
+       struct hfi1_qp_priv *priv;
+       struct rvt_qp *qp;
+
+       tr = container_of(work, struct tid_rdma_qp_params, trigger_work);
+       priv = container_of(tr, struct hfi1_qp_priv, tid_rdma);
+       qp = priv->owner;
+       spin_lock_irq(&qp->s_lock);
+       if (qp->s_flags & HFI1_S_WAIT_TID_SPACE) {
+               spin_unlock_irq(&qp->s_lock);
+               hfi1_do_send(priv->owner, true);
+       } else {
+               spin_unlock_irq(&qp->s_lock);
+       }
+       rvt_put_qp(qp);
+}
+
+/**
+ * tid_rdma_flush_wait - unwind any tid space wait
+ *
+ * This is called when resetting a qp to
+ * allow a destroy or reset to get rid
+ * of any tid space linkage and reference counts.
+ */
+static void _tid_rdma_flush_wait(struct rvt_qp *qp, struct tid_queue *queue)
+       __must_hold(&qp->s_lock)
+{
+       struct hfi1_qp_priv *priv;
+
+       if (!qp)
+               return;
+       lockdep_assert_held(&qp->s_lock);
+       priv = qp->priv;
+       qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
+       spin_lock(&priv->rcd->exp_lock);
+       if (!list_empty(&priv->tid_wait)) {
+               list_del_init(&priv->tid_wait);
+               qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
+               queue->dequeue++;
+               rvt_put_qp(qp);
+       }
+       spin_unlock(&priv->rcd->exp_lock);
+}
+
+void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
+       __must_hold(&qp->s_lock)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+
+       _tid_rdma_flush_wait(qp, &priv->rcd->flow_queue);
+}
+
+/* Flow functions */
+/**
+ * kern_reserve_flow - allocate a hardware flow
+ * @rcd - the context to use for allocation
+ * @last - the index of the preferred flow. Use RXE_NUM_TID_FLOWS to
+ *         signify "don't care".
+ *
+ * Use a bit mask based allocation to reserve a hardware
+ * flow for use in receiving KDETH data packets. If a preferred flow is
+ * specified the function will attempt to reserve that flow again, if
+ * available.
+ *
+ * The exp_lock must be held.
+ *
+ * Return:
+ * On success: a value postive value between 0 and RXE_NUM_TID_FLOWS - 1
+ * On failure: -EAGAIN
+ */
+static int kern_reserve_flow(struct hfi1_ctxtdata *rcd, int last)
+       __must_hold(&rcd->exp_lock)
+{
+       int nr;
+
+       /* Attempt to reserve the preferred flow index */
+       if (last >= 0 && last < RXE_NUM_TID_FLOWS &&
+           !test_and_set_bit(last, &rcd->flow_mask))
+               return last;
+
+       nr = ffz(rcd->flow_mask);
+       BUILD_BUG_ON(RXE_NUM_TID_FLOWS >=
+                    (sizeof(rcd->flow_mask) * BITS_PER_BYTE));
+       if (nr > (RXE_NUM_TID_FLOWS - 1))
+               return -EAGAIN;
+       set_bit(nr, &rcd->flow_mask);
+       return nr;
+}
+
+static void kern_set_hw_flow(struct hfi1_ctxtdata *rcd, u32 generation,
+                            u32 flow_idx)
+{
+       u64 reg;
+
+       reg = ((u64)generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
+               RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK |
+               RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK |
+               RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK |
+               RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK |
+               RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK;
+
+       if (generation != KERN_GENERATION_RESERVED)
+               reg |= RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK;
+
+       write_uctxt_csr(rcd->dd, rcd->ctxt,
+                       RCV_TID_FLOW_TABLE + 8 * flow_idx, reg);
+}
+
+static u32 kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
+       __must_hold(&rcd->exp_lock)
+{
+       u32 generation = rcd->flows[flow_idx].generation;
+
+       kern_set_hw_flow(rcd, generation, flow_idx);
+       return generation;
+}
+
+static u32 kern_flow_generation_next(u32 gen)
+{
+       u32 generation = mask_generation(gen + 1);
+
+       if (generation == KERN_GENERATION_RESERVED)
+               generation = mask_generation(generation + 1);
+       return generation;
+}
+
+static void kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
+       __must_hold(&rcd->exp_lock)
+{
+       rcd->flows[flow_idx].generation =
+               kern_flow_generation_next(rcd->flows[flow_idx].generation);
+       kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, flow_idx);
+}
+
+int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
+{
+       struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
+       struct tid_flow_state *fs = &qpriv->flow_state;
+       struct rvt_qp *fqp;
+       unsigned long flags;
+       int ret = 0;
+
+       /* The QP already has an allocated flow */
+       if (fs->index != RXE_NUM_TID_FLOWS)
+               return ret;
+
+       spin_lock_irqsave(&rcd->exp_lock, flags);
+       if (kernel_tid_waiters(rcd, &rcd->flow_queue, qp))
+               goto queue;
+
+       ret = kern_reserve_flow(rcd, fs->last_index);
+       if (ret < 0)
+               goto queue;
+       fs->index = ret;
+       fs->last_index = fs->index;
+
+       /* Generation received in a RESYNC overrides default flow generation */
+       if (fs->generation != KERN_GENERATION_RESERVED)
+               rcd->flows[fs->index].generation = fs->generation;
+       fs->generation = kern_setup_hw_flow(rcd, fs->index);
+       fs->psn = 0;
+       fs->flags = 0;
+       dequeue_tid_waiter(rcd, &rcd->flow_queue, qp);
+       /* get head before dropping lock */
+       fqp = first_qp(rcd, &rcd->flow_queue);
+       spin_unlock_irqrestore(&rcd->exp_lock, flags);
+
+       tid_rdma_schedule_tid_wakeup(fqp);
+       return 0;
+queue:
+       queue_qp_for_tid_wait(rcd, &rcd->flow_queue, qp);
+       spin_unlock_irqrestore(&rcd->exp_lock, flags);
+       return -EAGAIN;
+}
+
+void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
+{
+       struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
+       struct tid_flow_state *fs = &qpriv->flow_state;
+       struct rvt_qp *fqp;
+       unsigned long flags;
+
+       if (fs->index >= RXE_NUM_TID_FLOWS)
+               return;
+       spin_lock_irqsave(&rcd->exp_lock, flags);
+       kern_clear_hw_flow(rcd, fs->index);
+       clear_bit(fs->index, &rcd->flow_mask);
+       fs->index = RXE_NUM_TID_FLOWS;
+       fs->psn = 0;
+       fs->generation = KERN_GENERATION_RESERVED;
+
+       /* get head before dropping lock */
+       fqp = first_qp(rcd, &rcd->flow_queue);
+       spin_unlock_irqrestore(&rcd->exp_lock, flags);
+
+       if (fqp == qp) {
+               __trigger_tid_waiter(fqp);
+               rvt_put_qp(fqp);
+       } else {
+               tid_rdma_schedule_tid_wakeup(fqp);
+       }
+}
+
+void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd)
+{
+       int i;
+
+       for (i = 0; i < RXE_NUM_TID_FLOWS; i++) {
+               rcd->flows[i].generation = mask_generation(prandom_u32());
+               kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, i);
+       }
+}
index ee81515..3bc0aaf 100644 (file)
@@ -21,10 +21,21 @@ struct tid_rdma_params {
 };
 
 struct tid_rdma_qp_params {
+       struct work_struct trigger_work;
        struct tid_rdma_params local;
        struct tid_rdma_params __rcu *remote;
 };
 
+/* Track state for each hardware flow */
+struct tid_flow_state {
+       u32 generation;
+       u32 psn;
+       u32 r_next_psn;      /* next PSN to be received (in TID space) */
+       u8 index;
+       u8 last_index;
+       u8 flags;
+};
+
 bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data);
 bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data);
 bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data);
@@ -37,4 +48,10 @@ int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
                      struct ib_qp_init_attr *init_attr);
 void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
 
+void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp);
+
+int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
+void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
+void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd);
+
 #endif /* HFI1_TID_RDMA_H */
index c8baa1e..9065e47 100644 (file)
@@ -159,9 +159,12 @@ struct hfi1_qp_priv {
        struct sdma_engine *s_sde;                /* current sde */
        struct send_context *s_sendcontext;       /* current sendcontext */
        struct hfi1_ctxtdata *rcd;                /* QP's receive context */
+       u32 tid_enqueue;                          /* saved when tid waited */
        u8 s_sc;                                  /* SC[0..4] for next packet */
        struct iowait s_iowait;
+       struct list_head tid_wait;                /* for queueing tid space */
        struct hfi1_opfn_data opfn;
+       struct tid_flow_state flow_state;
        struct tid_rdma_qp_params tid_rdma;
        struct rvt_qp *owner;
        u8 hdr_type; /* 9B or 16B */