blk-mq: dequeue request one by one from sw queue if hctx is busy

author Ming Lei <ming.lei@redhat.com>

Tue, 3 Jul 2018 15:03:16 +0000 (09:03 -0600)

committer Jens Axboe <axboe@kernel.dk>

Mon, 9 Jul 2018 15:07:53 +0000 (09:07 -0600)
author Ming Lei <ming.lei@redhat.com>
Tue, 3 Jul 2018 15:03:16 +0000 (09:03 -0600)
committer Jens Axboe <axboe@kernel.dk>
Mon, 9 Jul 2018 15:07:53 +0000 (09:07 -0600)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c

index 7efe268..cb1e6cf 100644 (file)
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -622,6 +622,14 @@ static int hctx_active_show(void *data, struct seq_file *m)
         return 0;
  }
  
+static int hctx_dispatch_busy_show(void *data, struct seq_file *m)
+{
+       struct blk_mq_hw_ctx *hctx = data;
+
+       seq_printf(m, "%u\n", hctx->dispatch_busy);
+       return 0;
+}
+
  static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
         __acquires(&ctx->lock)
  {
@@ -783,6 +791,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
         {"queued", 0600, hctx_queued_show, hctx_queued_write},
         {"run", 0600, hctx_run_show, hctx_run_write},
         {"active", 0400, hctx_active_show},
+       {"dispatch_busy", 0400, hctx_dispatch_busy_show},
         {},
  };
  
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c

index f3b4b5c..fdc129e 100644 (file)
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -206,15 +206,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
                 }
         } else if (has_sched_dispatch) {
                 blk_mq_do_dispatch_sched(hctx);
-       } else if (q->mq_ops->get_budget) {
-               /*
-                * If we need to get budget before queuing request, we
-                * dequeue request one by one from sw queue for avoiding
-                * to mess up I/O merge when dispatch runs out of resource.
-                *
-                * TODO: get more budgets, and dequeue more requests in
-                * one time.
-                */
+       } else if (hctx->dispatch_busy) {
+               /* dequeue request one by one from sw queue if queue is busy */
                 blk_mq_do_dispatch_ctx(hctx);
         } else {
                 blk_mq_flush_busy_ctxs(hctx, &rq_list);
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 795ba85..850fdd0 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1074,6 +1074,35 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
         return true;
  }
  
+#define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT  8
+#define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR  4
+/*
+ * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
+ * - EWMA is one simple way to compute running average value
+ * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
+ * - take 4 as factor for avoiding to get too small(0) result, and this
+ *   factor doesn't matter because EWMA decreases exponentially
+ */
+static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
+{
+       unsigned int ewma;
+
+       if (hctx->queue->elevator)
+               return;
+
+       ewma = hctx->dispatch_busy;
+
+       if (!ewma && !busy)
+               return;
+
+       ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
+       if (busy)
+               ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
+       ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
+
+       hctx->dispatch_busy = ewma;
+}
+
  #define BLK_MQ_RESOURCE_DELAY  3               /* ms units */
  
  /*
@@ -1210,8 +1239,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                 else if (needs_restart && (ret == BLK_STS_RESOURCE))
                         blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
  
+               blk_mq_update_dispatch_busy(hctx, true);
                 return false;
-       }
+       } else
+               blk_mq_update_dispatch_busy(hctx, false);
  
         /*
          * If the host/device is unable to accept more work, inform the
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h

index ea69025..d710e92 100644 (file)
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -35,9 +35,10 @@ struct blk_mq_hw_ctx {
         struct sbitmap          ctx_map;
  
         struct blk_mq_ctx       *dispatch_from;
+       unsigned int            dispatch_busy;
  
-       struct blk_mq_ctx       **ctxs;
         unsigned int            nr_ctx;
+       struct blk_mq_ctx       **ctxs;
  
         spinlock_t              dispatch_wait_lock;
         wait_queue_entry_t      dispatch_wait;
author	Ming Lei <ming.lei@redhat.com>
	Tue, 3 Jul 2018 15:03:16 +0000 (09:03 -0600)
committer	Jens Axboe <axboe@kernel.dk>
	Mon, 9 Jul 2018 15:07:53 +0000 (09:07 -0600)
block/blk-mq-debugfs.c		patch \| blob \| history
block/blk-mq-sched.c		patch \| blob \| history
block/blk-mq.c		patch \| blob \| history
include/linux/blk-mq.h		patch \| blob \| history