Thread (46 messages) 46 messages, 7 authors, 2021-09-13

Re: [git pull] iov_iter fixes

From: Jens Axboe <axboe@kernel.dk>
Date: 2021-09-10 16:10:31
Also in: lkml
Subsystem: filesystems (vfs and infrastructure), the rest, userspace copyin/copyout (uiovec) · Maintainers: Alexander Viro, Christian Brauner, Linus Torvalds

On 9/10/21 9:04 AM, Jens Axboe wrote:
We could pretty this up and have the state part be explicit in iov_iter,
and have the store/restore parts end up in uio.h. That'd tie them closer
together, though I don't expect iov_iter changes to be an issue. It
would make it more maintainable, though. I'll try and hack up this
generic solution, see if that looks any better.
Looks something like this. Not super pretty in terms of needing a define
for this, and maybe I'm missing something, but ideally we'd want it as
an anonymous struct that's defined inside iov_iter. Anyway, gets the
point across. Alternatively, since we're down to just a few members now,
we just duplicate them in each struct...

Would be split into two patches, one for the iov_state addition and
the save/restore helpers, and then one switching io_uring to use them.
Figured we'd need some agreement on this first...

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 855ea544807f..539c94299d64 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2608,8 +2608,6 @@ static bool io_resubmit_prep(struct io_kiocb *req)
 
 	if (!rw)
 		return !io_req_prep_async(req);
-	/* may have left rw->iter inconsistent on -EIOCBQUEUED */
-	iov_iter_revert(&rw->iter, req->result - iov_iter_count(&rw->iter));
 	return true;
 }
 
@@ -3431,14 +3429,23 @@ static bool need_read_all(struct io_kiocb *req)
 		S_ISBLK(file_inode(req->file)->i_mode);
 }
 
+static void io_iter_restore(struct iov_iter *iter, struct iov_iter_state *state,
+			    ssize_t did_bytes)
+{
+	iov_iter_restore_state(iter, state);
+	if (did_bytes > 0)
+		iov_iter_advance(iter, did_bytes);
+}
+
 static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct kiocb *kiocb = &req->rw.kiocb;
 	struct iov_iter __iter, *iter = &__iter;
 	struct io_async_rw *rw = req->async_data;
-	ssize_t io_size, ret, ret2;
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+	struct iov_iter_state state;
+	ssize_t ret, ret2;
 
 	if (rw) {
 		iter = &rw->iter;
@@ -3448,8 +3455,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		if (ret < 0)
 			return ret;
 	}
-	io_size = iov_iter_count(iter);
-	req->result = io_size;
+	req->result = iov_iter_count(iter);
+	iov_iter_save_state(iter, &state);
 
 	/* Ensure we clear previously set non-block flag */
 	if (!force_nonblock)
@@ -3463,7 +3470,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		return ret ?: -EAGAIN;
 	}
 
-	ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size);
+	ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), state.count);
 	if (unlikely(ret)) {
 		kfree(iovec);
 		return ret;
@@ -3479,18 +3486,17 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		/* no retry on NONBLOCK nor RWF_NOWAIT */
 		if (req->flags & REQ_F_NOWAIT)
 			goto done;
-		/* some cases will consume bytes even on error returns */
-		iov_iter_reexpand(iter, iter->count + iter->truncated);
-		iov_iter_revert(iter, io_size - iov_iter_count(iter));
 		ret = 0;
 	} else if (ret == -EIOCBQUEUED) {
 		goto out_free;
-	} else if (ret <= 0 || ret == io_size || !force_nonblock ||
+	} else if (ret <= 0 || ret == state.count || !force_nonblock ||
 		   (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
 		/* read all, failed, already did sync or don't want to retry */
 		goto done;
 	}
 
+	io_iter_restore(iter, &state, ret);
+
 	ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
 	if (ret2)
 		return ret2;
@@ -3501,7 +3507,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 	iter = &rw->iter;
 
 	do {
-		io_size -= ret;
+		state.count -= ret;
 		rw->bytes_done += ret;
 		/* if we can retry, do so with the callbacks armed */
 		if (!io_rw_should_retry(req)) {
@@ -3520,7 +3526,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 			return 0;
 		/* we got some bytes, but not all. retry. */
 		kiocb->ki_flags &= ~IOCB_WAITQ;
-	} while (ret > 0 && ret < io_size);
+	} while (ret > 0 && ret < state.count);
 done:
 	kiocb_done(kiocb, ret, issue_flags);
 out_free:
@@ -3543,8 +3549,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	struct kiocb *kiocb = &req->rw.kiocb;
 	struct iov_iter __iter, *iter = &__iter;
 	struct io_async_rw *rw = req->async_data;
-	ssize_t ret, ret2, io_size;
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+	struct iov_iter_state state;
+	ssize_t ret, ret2;
 
 	if (rw) {
 		iter = &rw->iter;
@@ -3554,8 +3561,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 		if (ret < 0)
 			return ret;
 	}
-	io_size = iov_iter_count(iter);
-	req->result = io_size;
+	req->result = iov_iter_count(iter);
+	iov_iter_save_state(iter, &state);
+	ret2 = 0;
 
 	/* Ensure we clear previously set non-block flag */
 	if (!force_nonblock)
@@ -3572,7 +3580,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	    (req->flags & REQ_F_ISREG))
 		goto copy_iov;
 
-	ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), io_size);
+	ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), state.count);
 	if (unlikely(ret))
 		goto out_free;
 
@@ -3619,9 +3627,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 		kiocb_done(kiocb, ret2, issue_flags);
 	} else {
 copy_iov:
-		/* some cases will consume bytes even on error returns */
-		iov_iter_reexpand(iter, iter->count + iter->truncated);
-		iov_iter_revert(iter, io_size - iov_iter_count(iter));
+		io_iter_restore(iter, &state, ret2);
 		ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
 		return ret ?: -EAGAIN;
 	}
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 5265024e8b90..4f9d483096cd 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -27,11 +27,25 @@ enum iter_type {
 	ITER_DISCARD,
 };
 
+#define IOV_ITER_STATE					\
+	size_t iov_offset;				\
+	size_t count;					\
+	union {						\
+		unsigned long nr_segs;			\
+		struct {				\
+			unsigned int head;		\
+			unsigned int start_head;	\
+		};					\
+		loff_t xarray_start;			\
+	};						\
+
+struct iov_iter_state {
+	IOV_ITER_STATE;
+};
+
 struct iov_iter {
 	u8 iter_type;
 	bool data_source;
-	size_t iov_offset;
-	size_t count;
 	union {
 		const struct iovec *iov;
 		const struct kvec *kvec;
@@ -40,12 +54,10 @@ struct iov_iter {
 		struct pipe_inode_info *pipe;
 	};
 	union {
-		unsigned long nr_segs;
+		struct iov_iter_state state;
 		struct {
-			unsigned int head;
-			unsigned int start_head;
+			IOV_ITER_STATE;
 		};
-		loff_t xarray_start;
 	};
 	size_t truncated;
 };
@@ -55,6 +67,33 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i)
 	return i->iter_type;
 }
 
+static inline void iov_iter_save_state(struct iov_iter *iter,
+				       struct iov_iter_state *state)
+{
+	*state = iter->state;
+}
+
+static inline void iov_iter_restore_state(struct iov_iter *iter,
+					  struct iov_iter_state *state)
+{
+	iter->iov_offset = state->iov_offset;
+	iter->count = state->count;
+
+	switch (iov_iter_type(iter)) {
+	case ITER_IOVEC:
+	case ITER_KVEC:
+	case ITER_BVEC:
+		iter->iov -= state->nr_segs - iter->nr_segs;
+		fallthrough;
+	case ITER_PIPE:
+	case ITER_XARRAY:
+		iter->nr_segs = state->nr_segs;
+		break;
+	case ITER_DISCARD:
+		break;
+	}
+}
+
 static inline bool iter_is_iovec(const struct iov_iter *i)
 {
 	return iov_iter_type(i) == ITER_IOVEC;
-- 
Jens Axboe
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help