@@ -1221,6 +1221,8 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, vec,
unsigned long, vlen, unsigned int, flags)
{
+ struct pipe_inode_info *pipe;
+
if (unlikely(flags & ~SPLICE_F_ALL))
return -EINVAL;
@@ -1229,11 +1231,44 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, vec,
return -EBADF;
/* We do vfs_writev/vfs_readv, so it is okay to pass "false" here */
- if (!get_pipe_info(fd_file(f), /* for_splice = */ false))
+ pipe = get_pipe_info(fd_file(f), /* for_splice = */ false);
+
+ if (!pipe)
return -EBADF;
if (fd_file(f)->f_mode & FMODE_WRITE) {
- ssize_t ret = vfs_writev(fd_file(f), vec, vlen, NULL, (flags & SPLICE_F_NONBLOCK) ? RWF_NOWAIT : 0);
+ /*
+ * When writing to the pipe, previous implementation of vmsplice
+ * first waited for space in the pipe to appear
+ * (depending on whether SPLICE_F_NONBLOCK was passed),
+ * then did unconditional non-blocking write to the pipe.
+ *
+ * This differs from what pwritev2 does.
+ *
+ * For compatibility we do the same thing previous
+ * implementation did.
+ *
+ * We lock the pipe, do pipe_wait_for_space, then unlock
+ * the pipe, and then do vfs_writev. vfs_writev internally
+ * locks the pipe again. This may cause TOCTOU: when we
+ * do vfs_writev, the pipe may become full again. So we
+ * do a loop.
+ */
+
+ bool non_block = (flags & SPLICE_F_NONBLOCK) || (fd_file(f)->f_flags & O_NONBLOCK);
+ ssize_t ret;
+
+ do {
+ pipe_lock(pipe);
+ ret = pipe_wait_for_space(pipe, non_block);
+ pipe_unlock(pipe);
+
+ if (ret < 0)
+ break;
+
+ ret = vfs_writev(fd_file(f), vec, vlen, NULL, RWF_NOWAIT);
+ } while (!non_block && ret == -EAGAIN);
+
if (ret > 0)
add_wchar(current, ret);
inc_syscw(current);