Re: [PATCH V6 6/6] blk-mq: support batching dispatch in case of io scheduler

From: Ming Lei <hidden>
Date: 2020-06-29 18:36:28

On Mon, Jun 29, 2020 at 10:04:52AM +0100, Christoph Hellwig wrote:

quoted

+/*
+ * We know bfq and deadline apply single scheduler queue instead of multi
+ * queue. However, the two are often used on single queue devices, also
+ * the current @hctx should affect the real device status most of times
+ * because of locality principle.
+ *
+ * So use current hctx->dispatch_busy directly for figuring out batching
+ * dispatch count.
+ */

I don't really understand this comment.  Also I think the code might
be cleaner if this function is inlined as an if/else in the only
caller.

quoted

+static inline bool blk_mq_do_dispatch_rq_lists(struct blk_mq_hw_ctx *hctx,
+		struct list_head *lists, bool multi_hctxs, unsigned count)
+{
+	bool ret;
+
+	if (!count)
+		return false;
+
+	if (likely(!multi_hctxs))
+		return blk_mq_dispatch_rq_list(hctx, lists, count);

Keeping these checks in the callers would keep things a little cleaner,
especially as the multi hctx case only really needs the lists argument.

quoted

+		LIST_HEAD(list);
+		struct request *new, *rq = list_first_entry(lists,
+				struct request, queuelist);
+		unsigned cnt = 0;
+
+		list_for_each_entry(new, lists, queuelist) {
+			if (new->mq_hctx != rq->mq_hctx)
+				break;
+			cnt++;
+		}
+
+		if (new->mq_hctx == rq->mq_hctx)
+			list_splice_tail_init(lists, &list);
+		else
+			list_cut_before(&list, lists, &new->queuelist);
+
+		ret = blk_mq_dispatch_rq_list(rq->mq_hctx, &list, cnt);
+	}

I think this has two issues:  for one ret should be ORed as any dispatch
or error should leaave ret set.  Also we need to splice the dispatch

OK.

list back onto the main list here, otherwise we can lose those requests.

The dispatch list always becomes empty after blk_mq_dispatch_rq_list()
returns, so no need to splice it back.

quoted hunk ↗ jump to hunk

FYI, while reviewing this I ended up editing things into a shape I
could better understand.  Let me know what you think of this version?

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 4c72073830f3cb..466dce99699ae4 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c

@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/blk-mq.h>
+#include <linux/list_sort.h>
 
 #include <trace/events/block.h>

@@ -80,6 +81,38 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
 	blk_mq_run_hw_queue(hctx, true);
 }
 
+static int sched_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct request *rqa = container_of(a, struct request, queuelist);
+	struct request *rqb = container_of(b, struct request, queuelist);
+
+	return rqa->mq_hctx > rqb->mq_hctx;
+}
+
+static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
+{
+	struct blk_mq_hw_ctx *hctx =
+		list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
+	struct request *rq;
+	LIST_HEAD(hctx_list);
+	unsigned int count = 0;
+	bool ret;
+
+	list_for_each_entry(rq, rq_list, queuelist) {
+		if (rq->mq_hctx != hctx) {
+			list_cut_before(&hctx_list, rq_list, &rq->queuelist);
+			goto dispatch;
+		}
+		count++;
+	}
+	list_splice_tail_init(rq_list, &hctx_list);
+
+dispatch:
+	ret = blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
+	list_splice(&hctx_list, rq_list);

The above line isn't needed.

quoted hunk ↗ jump to hunk

+	return ret;
+}
+
 #define BLK_MQ_BUDGET_DELAY	3		/* ms units */
 
 /*

@@ -90,20 +123,29 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
  * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
  * be run again.  This is necessary to avoid starving flushes.
  */
-static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
+static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)

The return type can be changed to 'bool'.

quoted hunk ↗ jump to hunk

 {
 	struct request_queue *q = hctx->queue;
 	struct elevator_queue *e = q->elevator;
+	bool multi_hctxs = false, run_queue = false;
+	bool dispatched = false, busy = false;
+	unsigned int max_dispatch;
 	LIST_HEAD(rq_list);
-	int ret = 0;
-	struct request *rq;
+	int count = 0;
+
+	if (hctx->dispatch_busy)
+		max_dispatch = 1;
+	else
+		max_dispatch = hctx->queue->nr_requests;
 
 	do {
+		struct request *rq;
+
 		if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
 			break;
 
 		if (!list_empty_careful(&hctx->dispatch)) {
-			ret = -EAGAIN;
+			busy = true;
 			break;
 		}

@@ -120,7 +162,7 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 			 * no guarantee anyone will kick the queue.  Kick it
 			 * ourselves.
 			 */
-			blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
+			run_queue = true;
 			break;
 		}
 
@@ -130,7 +172,43 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 		 * in blk_mq_dispatch_rq_list().
 		 */
 		list_add(&rq->queuelist, &rq_list);

The above should change to list_add_tail(&rq->queuelist, &rq_list).


Thanks, 
Ming

`h`	back out one level
`j`	next message in thread
`k`	previous message in thread
`l`	drill in
`Esc`	close help / fold thread tree
`?`	toggle this help