Re: [PATCH v2 0/2] Optimise io_uring completion waiting
From: Jens Axboe <axboe@kernel.dk>
Date: 2019-09-24 13:13:44
Also in:
lkml
Subsystem:
filesystems (vfs and infrastructure), the rest · Maintainers:
Alexander Viro, Christian Brauner, Linus Torvalds
On 9/24/19 5:23 AM, Pavel Begunkov wrote:
quoted
Yep that should do it, and saves 8 bytes of stack as well. BTW, did you test my patch, this one or the previous? Just curious if it worked for you.Not yet, going to do that tonight
Thanks! For reference, the final version is below. There was still a signal mishap in there, now it should all be correct afaict.
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9b84232e5cc4..d2a86164d520 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c@@ -2768,6 +2768,38 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit, return submit; } +struct io_wait_queue { + struct wait_queue_entry wq; + struct io_ring_ctx *ctx; + unsigned to_wait; + unsigned nr_timeouts; +}; + +static inline bool io_should_wake(struct io_wait_queue *iowq) +{ + struct io_ring_ctx *ctx = iowq->ctx; + + /* + * Wake up if we have enough events, or if a timeout occured since we + * started waiting. For timeouts, we always want to return to userspace, + * regardless of event count. + */ + return io_cqring_events(ctx->rings) >= iowq->to_wait || + atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; +} + +static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, + int wake_flags, void *key) +{ + struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, + wq); + + if (!io_should_wake(iowq)) + return -1; + + return autoremove_wake_function(curr, mode, wake_flags, key); +} + /* * Wait until events become available, if we don't already have some. The * application must reap them itself, as they reside on the shared cq ring.
@@ -2775,8 +2807,16 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit, static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, const sigset_t __user *sig, size_t sigsz) { + struct io_wait_queue iowq = { + .wq = { + .private = current, + .func = io_wake_function, + .entry = LIST_HEAD_INIT(iowq.wq.entry), + }, + .ctx = ctx, + .to_wait = min_events, + }; struct io_rings *rings = ctx->rings; - unsigned nr_timeouts; int ret; if (io_cqring_events(rings) >= min_events)
@@ -2795,15 +2835,20 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; } - nr_timeouts = atomic_read(&ctx->cq_timeouts); - /* - * Return if we have enough events, or if a timeout occured since - * we started waiting. For timeouts, we always want to return to - * userspace. - */ - ret = wait_event_interruptible(ctx->wait, - io_cqring_events(rings) >= min_events || - atomic_read(&ctx->cq_timeouts) != nr_timeouts); + iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); + do { + prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, + TASK_INTERRUPTIBLE); + if (io_should_wake(&iowq)) + break; + schedule(); + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + } while (1); + finish_wait(&ctx->wait, &iowq.wq); + restore_saved_sigmask_unless(ret == -ERESTARTSYS); if (ret == -ERESTARTSYS) ret = -EINTR;
--
Jens Axboe