[PATCH 08/12] ext4: hold i_rwsem until AIO completes
From: Christoph Hellwig <hch@lst.de>
Date: 2020-01-14 16:13:17
Also in:
linux-fsdevel, linux-mm, linux-xfs, lkml
Subsystem:
ext4 file system, filesystems (vfs and infrastructure), the rest · Maintainers:
"Theodore Ts'o", Alexander Viro, Christian Brauner, Linus Torvalds
Switch ext4 from the magic i_dio_count scheme to just hold i_rwsem until the actual I/O has completed to reduce the locking complexity and avoid nasty bugs due to missing inode_dio_wait calls. Signed-off-by: Christoph Hellwig <hch@lst.de> --- fs/ext4/extents.c | 12 ------------ fs/ext4/file.c | 21 +++++++++++++-------- fs/ext4/inode.c | 11 ----------- fs/ext4/ioctl.c | 5 ----- fs/ext4/move_extent.c | 4 ---- 5 files changed, 13 insertions(+), 40 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0e8708b77da6..b6aa2d249b30 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c@@ -4777,9 +4777,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (mode & FALLOC_FL_KEEP_SIZE) flags |= EXT4_GET_BLOCKS_KEEP_SIZE; - /* Wait all existing dio workers, newcomers will block on i_mutex */ - inode_dio_wait(inode); - /* Preallocate the range including the unaligned edges */ if (partial_begin || partial_end) { ret = ext4_alloc_file_blocks(file,
@@ -4949,9 +4946,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) goto out; } - /* Wait all existing dio workers, newcomers will block on i_mutex */ - inode_dio_wait(inode); - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); if (ret) goto out;
@@ -5525,9 +5519,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) goto out_mutex; } - /* Wait for existing dio to complete */ - inode_dio_wait(inode); - /* * Prevent page faults from reinstantiating pages we have released from * page cache.
@@ -5678,9 +5669,6 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) goto out_mutex; } - /* Wait for existing dio to complete */ - inode_dio_wait(inode); - /* * Prevent page faults from reinstantiating pages we have released from * page cache.
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 08b603d0c638..b3410a3ede27 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c@@ -74,9 +74,10 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) return generic_file_read_iter(iocb, to); } - ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0); - inode_unlock_shared(inode); - + ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, + IOMAP_DIO_RWSEM_SHARED); + if (ret != -EIOCBQUEUED) + inode_unlock_shared(inode); file_accessed(iocb->ki_filp); return ret; }
@@ -405,7 +406,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) !is_sync_kiocb(iocb) && ext4_unaligned_aio(inode, from, offset)) { unaligned_aio = true; dio_flags |= IOMAP_DIO_SYNCHRONOUS; - inode_dio_wait(inode); } /*
@@ -416,7 +416,10 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!unaligned_aio && ext4_overwrite_io(inode, offset, count) && ext4_should_dioread_nolock(inode)) { overwrite = true; + dio_flags |= IOMAP_DIO_RWSEM_SHARED; downgrade_write(&inode->i_rwsem); + } else { + dio_flags |= IOMAP_DIO_RWSEM_EXCL; } if (offset + count > EXT4_I(inode)->i_disksize) {
@@ -444,10 +447,12 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = ext4_handle_inode_extension(inode, offset, ret, count); out: - if (overwrite) - inode_unlock_shared(inode); - else - inode_unlock(inode); + if (ret != -EIOCBQUEUED) { + if (overwrite) + inode_unlock_shared(inode); + else + inode_unlock(inode); + } if (ret >= 0 && iov_iter_count(from)) { ssize_t err;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 629a25d999f0..e2dac0727ab0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c@@ -3965,9 +3965,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) } - /* Wait all existing dio workers, newcomers will block on i_mutex */ - inode_dio_wait(inode); - /* * Prevent page faults from reinstantiating pages we have released from * page cache.
@@ -5263,11 +5260,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (error) goto err_out; } - /* - * Blocks are going to be removed from the inode. Wait - * for dio in flight. - */ - inode_dio_wait(inode); } down_write(&EXT4_I(inode)->i_mmap_sem);
@@ -5798,9 +5790,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) if (is_journal_aborted(journal)) return -EROFS; - /* Wait for all existing dio workers */ - inode_dio_wait(inode); - /* * Before flushing the journal and switching inode's aops, we have * to flush all dirty data the inode has. There can be outstanding
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e8870fff8224..99d21d81074f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c@@ -153,10 +153,6 @@ static long swap_inode_boot_loader(struct super_block *sb, if (err) goto err_out; - /* Wait for all existing dio workers */ - inode_dio_wait(inode); - inode_dio_wait(inode_bl); - truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode_bl->i_data, 0);
@@ -364,7 +360,6 @@ static int ext4_ioctl_setflags(struct inode *inode, */ if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) && (flags & EXT4_IMMUTABLE_FL)) { - inode_dio_wait(inode); err = filemap_write_and_wait(inode->i_mapping); if (err) goto flags_out;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 30ce3dc69378..20240808569f 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c@@ -602,10 +602,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, /* Protect orig and donor inodes against a truncate */ lock_two_nondirectories(orig_inode, donor_inode); - /* Wait for all existing dio workers */ - inode_dio_wait(orig_inode); - inode_dio_wait(donor_inode); - /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(orig_inode, donor_inode); /* Check the filesystem environment whether move_extent can be done */
--
2.24.1