OSDN Git Service

IB/hfi1: Prioritize the sending of ACK packets
authorKaike Wan <kaike.wan@intel.com>
Thu, 24 Jan 2019 05:52:19 +0000 (21:52 -0800)
committerDoug Ledford <dledford@redhat.com>
Tue, 5 Feb 2019 23:07:44 +0000 (18:07 -0500)
ACK packets are generally associated with request completion and resource
release and therefore should be sent first. This patch optimizes the
send engine by using the following policies:
(1) QPs with RVT_S_ACK_PENDING bit set in qp->s_flags or qpriv->s_flags
should have their priority incremented;
(2) QPs with ACK or TID-ACK packet queued should have their priority
incremented;
(3) When a QP is queued to the wait list due to resource constraints, it
will be queued to the head if it has ACK packet to send;
(4) When selecting qps to run from the wait list, the one with the highest
priority and starve_cnt will be selected; each priority will be equivalent
to a fixed number of starve_cnt (16).

Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
12 files changed:
drivers/infiniband/hw/hfi1/iowait.c
drivers/infiniband/hw/hfi1/iowait.h
drivers/infiniband/hw/hfi1/pio.c
drivers/infiniband/hw/hfi1/qp.c
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/hfi1/sdma.c
drivers/infiniband/hw/hfi1/sdma_txreq.h
drivers/infiniband/hw/hfi1/tid_rdma.c
drivers/infiniband/hw/hfi1/user_sdma.c
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hfi1/verbs_txreq.h
drivers/infiniband/hw/hfi1/vnic_sdma.c

index 582f1ba..adb4a1b 100644 (file)
@@ -6,6 +6,9 @@
 #include "iowait.h"
 #include "trace_iowait.h"
 
+/* 1 priority == 16 starve_cnt */
+#define IOWAIT_PRIORITY_STARVE_SHIFT 4
+
 void iowait_set_flag(struct iowait *wait, u32 flag)
 {
        trace_hfi1_iowait_set(wait, flag);
@@ -44,7 +47,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
                              uint seq,
                              bool pkts_sent),
                 void (*wakeup)(struct iowait *wait, int reason),
-                void (*sdma_drained)(struct iowait *wait))
+                void (*sdma_drained)(struct iowait *wait),
+                void (*init_priority)(struct iowait *wait))
 {
        int i;
 
@@ -58,6 +62,7 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
        wait->sleep = sleep;
        wait->wakeup = wakeup;
        wait->sdma_drained = sdma_drained;
+       wait->init_priority = init_priority;
        wait->flags = 0;
        for (i = 0; i < IOWAIT_SES; i++) {
                wait->wait[i].iow = wait;
@@ -92,3 +97,30 @@ int iowait_set_work_flag(struct iowait_work *w)
        iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
        return IOWAIT_TID_SE;
 }
+
+/**
+ * iowait_priority_update_top - update the top priority entry
+ * @w: the iowait struct
+ * @top: a pointer to the top priority entry
+ * @idx: the index of the current iowait in an array
+ * @top_idx: the array index for the iowait entry that has the top priority
+ *
+ * This function is called to compare the priority of a given
+ * iowait with the given top priority entry. The top index will
+ * be returned.
+ */
+uint iowait_priority_update_top(struct iowait *w,
+                               struct iowait *top,
+                               uint idx, uint top_idx)
+{
+       u8 cnt, tcnt;
+
+       /* Convert priority into starve_cnt and compare the total.*/
+       cnt = (w->priority << IOWAIT_PRIORITY_STARVE_SHIFT) + w->starved_cnt;
+       tcnt = (top->priority << IOWAIT_PRIORITY_STARVE_SHIFT) +
+               top->starved_cnt;
+       if (cnt > tcnt)
+               return idx;
+       else
+               return top_idx;
+}
index bd91370..07847cb 100644 (file)
@@ -100,6 +100,7 @@ struct iowait_work {
  * @sleep: no space callback
  * @wakeup: space callback wakeup
  * @sdma_drained: sdma count drained
+ * @init_priority: callback to manipulate priority
  * @lock: lock protected head of wait queue
  * @iowork: workqueue overhead
  * @wait_dma: wait for sdma_busy == 0
@@ -109,7 +110,7 @@ struct iowait_work {
  * @tx_limit: limit for overflow queuing
  * @tx_count: number of tx entry's in tx_head'ed list
  * @flags: wait flags (one per QP)
- * @wait: SE array
+ * @wait: SE array for multiple legs
  *
  * This is to be embedded in user's state structure
  * (QP or PQ).
@@ -120,10 +121,13 @@ struct iowait_work {
  * are callbacks for the ULP to implement
  * what ever queuing/dequeuing of
  * the embedded iowait and its containing struct
- * when a resource shortage like SDMA ring space is seen.
+ * when a resource shortage like SDMA ring space
+ * or PIO credit space is seen.
  *
  * Both potentially have locks help
- * so sleeping is not allowed.
+ * so sleeping is not allowed and it is not
+ * supported to submit txreqs from the wakeup
+ * call directly because of lock conflicts.
  *
  * The wait_dma member along with the iow
  *
@@ -143,6 +147,7 @@ struct iowait {
                );
        void (*wakeup)(struct iowait *wait, int reason);
        void (*sdma_drained)(struct iowait *wait);
+       void (*init_priority)(struct iowait *wait);
        seqlock_t *lock;
        wait_queue_head_t wait_dma;
        wait_queue_head_t wait_pio;
@@ -152,6 +157,7 @@ struct iowait {
        u32 tx_limit;
        u32 tx_count;
        u8 starved_cnt;
+       u8 priority;
        unsigned long flags;
        struct iowait_work wait[IOWAIT_SES];
 };
@@ -171,7 +177,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
                              uint seq,
                              bool pkts_sent),
                 void (*wakeup)(struct iowait *wait, int reason),
-                void (*sdma_drained)(struct iowait *wait));
+                void (*sdma_drained)(struct iowait *wait),
+                void (*init_priority)(struct iowait *wait));
 
 /**
  * iowait_schedule() - schedule the default send engine work
@@ -339,6 +346,8 @@ static inline u16 iowait_get_desc(struct iowait_work *w)
                tx = list_first_entry(&w->tx_head, struct sdma_txreq,
                                      list);
                num_desc = tx->num_desc;
+               if (tx->flags & SDMA_TXREQ_F_VIP)
+                       w->iow->priority++;
        }
        return num_desc;
 }
@@ -352,6 +361,37 @@ static inline u32 iowait_get_all_desc(struct iowait *w)
        return num_desc;
 }
 
+static inline void iowait_update_priority(struct iowait_work *w)
+{
+       struct sdma_txreq *tx = NULL;
+
+       if (!list_empty(&w->tx_head)) {
+               tx = list_first_entry(&w->tx_head, struct sdma_txreq,
+                                     list);
+               if (tx->flags & SDMA_TXREQ_F_VIP)
+                       w->iow->priority++;
+       }
+}
+
+static inline void iowait_update_all_priority(struct iowait *w)
+{
+       iowait_update_priority(&w->wait[IOWAIT_IB_SE]);
+       iowait_update_priority(&w->wait[IOWAIT_TID_SE]);
+}
+
+static inline void iowait_init_priority(struct iowait *w)
+{
+       w->priority = 0;
+       if (w->init_priority)
+               w->init_priority(w);
+}
+
+static inline void iowait_get_priority(struct iowait *w)
+{
+       iowait_init_priority(w);
+       iowait_update_all_priority(w);
+}
+
 /**
  * iowait_queue - Put the iowait on a wait queue
  * @pkts_sent: have some packets been sent before queuing?
@@ -368,14 +408,18 @@ static inline void iowait_queue(bool pkts_sent, struct iowait *w,
        /*
         * To play fair, insert the iowait at the tail of the wait queue if it
         * has already sent some packets; Otherwise, put it at the head.
+        * However, if it has priority packets to send, also put it at the
+        * head.
         */
-       if (pkts_sent) {
-               list_add_tail(&w->list, wait_head);
+       if (pkts_sent)
                w->starved_cnt = 0;
-       } else {
-               list_add(&w->list, wait_head);
+       else
                w->starved_cnt++;
-       }
+
+       if (w->priority > 0 || !pkts_sent)
+               list_add(&w->list, wait_head);
+       else
+               list_add_tail(&w->list, wait_head);
 }
 
 /**
@@ -392,27 +436,10 @@ static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w)
                w->starved_cnt = 0;
 }
 
-/**
- * iowait_starve_find_max - Find the maximum of the starve count
- * @w: the iowait struct
- * @max: a variable containing the max starve count
- * @idx: the index of the current iowait in an array
- * @max_idx: a variable containing the array index for the
- *         iowait entry that has the max starve count
- *
- * This function is called to compare the starve count of a
- * given iowait with the given max starve count. The max starve
- * count and the index will be updated if the iowait's start
- * count is larger.
- */
-static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
-                                         uint idx, uint *max_idx)
-{
-       if (w->starved_cnt > *max) {
-               *max = w->starved_cnt;
-               *max_idx = idx;
-       }
-}
+/* Update the top priority index */
+uint iowait_priority_update_top(struct iowait *w,
+                               struct iowait *top,
+                               uint idx, uint top_idx);
 
 /**
  * iowait_packet_queued() - determine if a packet is queued
index 04126d7..a1de566 100644 (file)
@@ -1599,8 +1599,7 @@ static void sc_piobufavail(struct send_context *sc)
        struct rvt_qp *qp;
        struct hfi1_qp_priv *priv;
        unsigned long flags;
-       uint i, n = 0, max_idx = 0;
-       u8 max_starved_cnt = 0;
+       uint i, n = 0, top_idx = 0;
 
        if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
            dd->send_contexts[sc->sw_index].type != SC_VL15)
@@ -1619,11 +1618,18 @@ static void sc_piobufavail(struct send_context *sc)
                if (n == ARRAY_SIZE(qps))
                        break;
                wait = list_first_entry(list, struct iowait, list);
+               iowait_get_priority(wait);
                qp = iowait_to_qp(wait);
                priv = qp->priv;
                list_del_init(&priv->s_iowait.list);
                priv->s_iowait.lock = NULL;
-               iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx);
+               if (n) {
+                       priv = qps[top_idx]->priv;
+                       top_idx = iowait_priority_update_top(wait,
+                                                            &priv->s_iowait,
+                                                            n, top_idx);
+               }
+
                /* refcount held until actual wake up */
                qps[n++] = qp;
        }
@@ -1638,12 +1644,12 @@ static void sc_piobufavail(struct send_context *sc)
        }
        write_sequnlock_irqrestore(&sc->waitlock, flags);
 
-       /* Wake up the most starved one first */
+       /* Wake up the top-priority one first */
        if (n)
-               hfi1_qp_wakeup(qps[max_idx],
+               hfi1_qp_wakeup(qps[top_idx],
                               RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
        for (i = 0; i < n; i++)
-               if (i != max_idx)
+               if (i != top_idx)
                        hfi1_qp_wakeup(qps[i],
                                       RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
 }
index cfd598e..d8f7add 100644 (file)
@@ -518,6 +518,7 @@ static int iowait_sleep(
 
                        ibp->rvp.n_dmawait++;
                        qp->s_flags |= RVT_S_WAIT_DMA_DESC;
+                       iowait_get_priority(&priv->s_iowait);
                        iowait_queue(pkts_sent, &priv->s_iowait,
                                     &sde->dmawait);
                        priv->s_iowait.lock = &sde->waitlock;
@@ -567,6 +568,17 @@ static void iowait_sdma_drained(struct iowait *wait)
        spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
+static void hfi1_init_priority(struct iowait *w)
+{
+       struct rvt_qp *qp = iowait_to_qp(w);
+       struct hfi1_qp_priv *priv = qp->priv;
+
+       if (qp->s_flags & RVT_S_ACK_PENDING)
+               w->priority++;
+       if (priv->s_flags & RVT_S_ACK_PENDING)
+               w->priority++;
+}
+
 /**
  * qp_to_sdma_engine - map a qp to a send engine
  * @qp: the QP
@@ -727,7 +739,8 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
                _hfi1_do_tid_send,
                iowait_sleep,
                iowait_wakeup,
-               iowait_sdma_drained);
+               iowait_sdma_drained,
+               hfi1_init_priority);
        return priv;
 }
 
index 82afa77..e6726c1 100644 (file)
@@ -390,6 +390,7 @@ normal_no_state:
                bth0 = OP(ACKNOWLEDGE) << 24;
                bth2 = mask_psn(qp->s_ack_psn);
                qp->s_flags &= ~RVT_S_ACK_PENDING;
+               ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
                ps->s_txreq->ss = NULL;
        }
        qp->s_rdma_ack_cnt++;
index 96897a9..b011072 100644 (file)
@@ -1747,10 +1747,9 @@ retry:
  */
 static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
 {
-       struct iowait *wait, *nw;
+       struct iowait *wait, *nw, *twait;
        struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
-       uint i, n = 0, seq, max_idx = 0;
-       u8 max_starved_cnt = 0;
+       uint i, n = 0, seq, tidx = 0;
 
 #ifdef CONFIG_SDMA_VERBOSITY
        dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
@@ -1775,13 +1774,20 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
                                        continue;
                                if (n == ARRAY_SIZE(waits))
                                        break;
+                               iowait_init_priority(wait);
                                num_desc = iowait_get_all_desc(wait);
                                if (num_desc > avail)
                                        break;
                                avail -= num_desc;
-                               /* Find the most starved wait memeber */
-                               iowait_starve_find_max(wait, &max_starved_cnt,
-                                                      n, &max_idx);
+                               /* Find the top-priority wait memeber */
+                               if (n) {
+                                       twait = waits[tidx];
+                                       tidx =
+                                           iowait_priority_update_top(wait,
+                                                                      twait,
+                                                                      n,
+                                                                      tidx);
+                               }
                                list_del_init(&wait->list);
                                waits[n++] = wait;
                        }
@@ -1790,12 +1796,12 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
                }
        } while (read_seqretry(&sde->waitlock, seq));
 
-       /* Schedule the most starved one first */
+       /* Schedule the top-priority entry first */
        if (n)
-               waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
+               waits[tidx]->wakeup(waits[tidx], SDMA_AVAIL_REASON);
 
        for (i = 0; i < n; i++)
-               if (i != max_idx)
+               if (i != tidx)
                        waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
 }
 
index bf7d777..514a478 100644 (file)
@@ -91,6 +91,7 @@ struct sdma_desc {
 #define SDMA_TXREQ_F_URGENT       0x0001
 #define SDMA_TXREQ_F_AHG_COPY     0x0002
 #define SDMA_TXREQ_F_USE_AHG      0x0004
+#define SDMA_TXREQ_F_VIP          0x0010
 
 struct sdma_txreq;
 typedef void (*callback_t)(struct sdma_txreq *, int);
index a49eb3d..bc2ff83 100644 (file)
@@ -5296,6 +5296,7 @@ static int make_tid_rdma_ack(struct rvt_qp *qp,
        ps->s_txreq->ss = NULL;
        hfi1_make_ruc_header(qp, ohdr, (TID_OP(ACK) << 24), bth1, bth2, middle,
                             ps);
+       ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
        return 1;
 bail:
        /*
index 6764114..8bfbc6d 100644 (file)
@@ -144,8 +144,10 @@ static int defer_packet_queue(
         */
        xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
        write_seqlock(&sde->waitlock);
-       if (list_empty(&pq->busy.list))
+       if (list_empty(&pq->busy.list)) {
+               iowait_get_priority(&pq->busy);
                iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
+       }
        write_sequnlock(&sde->waitlock);
        return -EBUSY;
 eagain:
@@ -191,7 +193,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
        pq->mm = fd->mm;
 
        iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
-                   activate_packet_queue, NULL);
+                   activate_packet_queue, NULL, NULL);
        pq->reqidx = 0;
 
        pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
index ab97d71..55a56b3 100644 (file)
@@ -945,6 +945,7 @@ static int pio_wait(struct rvt_qp *qp,
                        dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
                        qp->s_flags |= flag;
                        was_empty = list_empty(&sc->piowait);
+                       iowait_get_priority(&priv->s_iowait);
                        iowait_queue(ps->pkts_sent, &priv->s_iowait,
                                     &sc->piowait);
                        priv->s_iowait.lock = &sc->waitlock;
index 2a77af2..b002e96 100644 (file)
@@ -94,6 +94,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
        tx->txreq.num_desc = 0;
        /* Set the header type */
        tx->phdr.hdr.hdr_type = priv->hdr_type;
+       tx->txreq.flags = 0;
        return tx;
 }
 
index 1f81c48..af1b1ff 100644 (file)
@@ -240,8 +240,10 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
        }
 
        vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
-       if (list_empty(&vnic_sdma->wait.list))
+       if (list_empty(&vnic_sdma->wait.list)) {
+               iowait_get_priority(wait->iow);
                iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
+       }
        write_sequnlock(&sde->waitlock);
        return -EBUSY;
 }
@@ -281,7 +283,7 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
 
                iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
                            hfi1_vnic_sdma_sleep,
-                           hfi1_vnic_sdma_wakeup, NULL);
+                           hfi1_vnic_sdma_wakeup, NULL, NULL);
                vnic_sdma->sde = &vinfo->dd->per_sdma[i];
                vnic_sdma->dd = vinfo->dd;
                vnic_sdma->vinfo = vinfo;