Thread (32 messages) 32 messages, 2 authors, 2021-07-20

Re: [PATCH 07/27] btrfs: add subpage checked_bitmap to make PageChecked flag to be subpage compatible

From: Qu Wenruo <hidden>
Date: 2021-07-16 07:54:49


On 2021/7/13 下午2:14, Qu Wenruo wrote:
quoted hunk ↗ jump to hunk
Although in btrfs we have very limited user of PageChecked flag, it's
still some page flag not yet subpage compatible.

Fix it by introducing btrfs_subpage::checked_bitmap to do the covert.

For most call sites, especially for free-space cache, COW fixup and
btrfs_invalidatepage(), they all work in full page mode anyway.

For other call sites, they work as fully subpage mode.

Some call sites need extra modification:

- btrfs_drop_pages()
   Needs extra parameter to get the real range we need to clear checked
   flag.

- btrfs_invalidatepage()
   We need to call subpage helper before calling __btrfs_releasepage(),
   or it will trigger ASSERT() as page->private will be cleared.

- btrfs_verify_data_csum()
   In theory we don't need the io_bio->csum check anymore, but it's
   won't hurt.
   Just change the comment.

Signed-off-by: Qu Wenruo <redacted>
---
  fs/btrfs/compression.c      | 11 +++++++++--
  fs/btrfs/file.c             | 20 +++++++++++++++-----
  fs/btrfs/free-space-cache.c |  6 +++++-
  fs/btrfs/inode.c            | 30 ++++++++++++++----------------
  fs/btrfs/reflink.c          |  2 +-
  fs/btrfs/subpage.c          | 31 +++++++++++++++++++++++++++++++
  fs/btrfs/subpage.h          |  8 ++++++++
  7 files changed, 83 insertions(+), 25 deletions(-)
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f08522e8b4cd..be81e34fbd56 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -29,6 +29,7 @@
  #include "extent_io.h"
  #include "extent_map.h"
  #include "zoned.h"
+#include "subpage.h"
  
  static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
  
@@ -296,8 +297,14 @@ static void end_compressed_bio_read(struct bio *bio)
  		 * checked so the end_io handlers know about it
  		 */
  		ASSERT(!bio_flagged(bio, BIO_CLONED));
-		bio_for_each_segment_all(bvec, cb->orig_bio, iter_all)
-			SetPageChecked(bvec->bv_page);
+		bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
+			u64 bvec_start = page_offset(bvec->bv_page) +
+					 bvec->bv_offset;
+
+			btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb),
+					bvec->bv_page, bvec_start,
+					bvec->bv_len);
+		}
  
  		bio_endio(cb->orig_bio);
  	}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0831ca08376f..ccbbf2732685 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -437,9 +437,16 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
  /*
   * unlocks pages after btrfs_file_write is done with them
   */
-static void btrfs_drop_pages(struct page **pages, size_t num_pages)
+static void btrfs_drop_pages(struct btrfs_fs_info *fs_info,
+			     struct page **pages, size_t num_pages,
+			     u64 pos, u64 copied)
  {
  	size_t i;
+	u64 block_start = round_down(pos, fs_info->sectorsize);
+	u64 block_len = round_up(pos + copied, fs_info->sectorsize) -
+			block_start;
+
+	ASSERT(block_len <= U32_MAX);
  	for (i = 0; i < num_pages; i++) {
  		/* page checked is some magic around finding pages that
  		 * have been modified without going through btrfs_set_page_dirty
@@ -447,7 +454,8 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
  		 * accessed as prepare_pages should have marked them accessed
  		 * in prepare_pages via find_or_create_page()
  		 */
-		ClearPageChecked(pages[i]);
+		btrfs_page_clamp_clear_checked(fs_info, pages[i], block_start,
+					       block_len);
Currently btrfs_subpage_clamp() can't handle any page whose 
page_offset() is beyond @block_start + @block_len.

This leads to rare crash in generic/269 and generic/102.

Will also update btrfs_subpage_clamp() to handle such case in next update.

Thanks,
Q
quoted hunk ↗ jump to hunk
  		unlock_page(pages[i]);
  		put_page(pages[i]);
  	}
@@ -504,7 +512,8 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
  		struct page *p = pages[i];
  
  		btrfs_page_clamp_set_uptodate(fs_info, p, start_pos, num_bytes);
-		ClearPageChecked(p);
+		btrfs_page_clamp_clear_checked(fs_info, p, start_pos,
+					       num_bytes);
  		btrfs_page_clamp_set_dirty(fs_info, p, start_pos, num_bytes);
  	}
  
@@ -1845,7 +1854,8 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
  
  		btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
  		if (ret) {
-			btrfs_drop_pages(pages, num_pages);
+			btrfs_drop_pages(fs_info, pages, num_pages, pos,
+					 copied);
  			break;
  		}
  
@@ -1853,7 +1863,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
  		if (only_release_metadata)
  			btrfs_check_nocow_unlock(BTRFS_I(inode));
  
-		btrfs_drop_pages(pages, num_pages);
+		btrfs_drop_pages(fs_info, pages, num_pages, pos, copied);
  
  		cond_resched();
  
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 2131ae5b9ed7..f0a84fe7dc80 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -22,6 +22,7 @@
  #include "delalloc-space.h"
  #include "block-group.h"
  #include "discard.h"
+#include "subpage.h"
  
  #define BITS_PER_BITMAP		(PAGE_SIZE * 8UL)
  #define MAX_CACHE_BYTES_PER_GIG	SZ_64K
@@ -417,7 +418,10 @@ static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
  
  	for (i = 0; i < io_ctl->num_pages; i++) {
  		if (io_ctl->pages[i]) {
-			ClearPageChecked(io_ctl->pages[i]);
+			btrfs_page_clear_checked(io_ctl->fs_info,
+					io_ctl->pages[i],
+					page_offset(io_ctl->pages[i]),
+					PAGE_SIZE);
  			unlock_page(io_ctl->pages[i]);
  			put_page(io_ctl->pages[i]);
  		}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b524deadb5c6..5c29d131a574 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2757,7 +2757,8 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
  		clear_page_dirty_for_io(page);
  		SetPageError(page);
  	}
-	ClearPageChecked(page);
+	btrfs_page_clear_checked(inode->root->fs_info, page,
+				 page_start, PAGE_SIZE);
  	unlock_page(page);
  	put_page(page);
  	kfree(fixup);
@@ -2812,7 +2813,7 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
  	 * page->mapping outside of the page lock.
  	 */
  	ihold(inode);
-	SetPageChecked(page);
+	btrfs_page_set_checked(fs_info, page, page_offset(page), PAGE_SIZE);
  	get_page(page);
  	btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
  	fixup->page = page;
@@ -3257,27 +3258,23 @@ unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
  				    struct page *page, u64 start, u64 end)
  {
  	struct inode *inode = page->mapping->host;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
  	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
  	struct btrfs_root *root = BTRFS_I(inode)->root;
  	const u32 sectorsize = root->fs_info->sectorsize;
  	u32 pg_off;
  	unsigned int result = 0;
  
-	if (PageChecked(page)) {
-		ClearPageChecked(page);
+	if (btrfs_page_test_checked(fs_info, page, start, end + 1 - start)) {
+		btrfs_page_clear_checked(fs_info, page, start, end + 1 - start);
  		return 0;
  	}
  
  	/*
-	 * For subpage case, above PageChecked is not safe as it's not subpage
-	 * compatible.
-	 * But for now only cow fixup and compressed read utilize PageChecked
-	 * flag, while in this context we can easily use io_bio->csum to
-	 * determine if we really need to do csum verification.
-	 *
-	 * So for now, just exit if io_bio->csum is NULL, as it means it's
-	 * compressed read, and its compressed data csum has already been
-	 * verified.
+	 * This only happens for NODATASUM or compressed read.
+	 * Normally this should be covered by above check for compressed read
+	 * or the next check for NODATASUM.
+	 * Just do a quicker exit here.
  	 */
  	if (io_bio->csum == NULL)
  		return 0;
@@ -5083,7 +5080,8 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
  				     len);
  		flush_dcache_page(page);
  	}
-	ClearPageChecked(page);
+	btrfs_page_clear_checked(fs_info, page, block_start,
+				 block_end + 1 - block_start);
  	btrfs_page_set_dirty(fs_info, page, block_start, block_end + 1 - block_start);
  	unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
  
@@ -8673,9 +8671,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
  	 * did something wrong.
  	 */
  	ASSERT(!PageOrdered(page));
+	btrfs_page_clear_checked(fs_info, page, page_offset(page), PAGE_SIZE);
  	if (!inode_evicting)
  		__btrfs_releasepage(page, GFP_NOFS);
-	ClearPageChecked(page);
  	clear_page_extent_mapped(page);
  }
  
@@ -8819,7 +8817,7 @@ vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
  		memzero_page(page, zero_start, PAGE_SIZE - zero_start);
  		flush_dcache_page(page);
  	}
-	ClearPageChecked(page);
+	btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE);
  	btrfs_page_set_dirty(fs_info, page, page_start, end + 1 - page_start);
  	btrfs_page_set_uptodate(fs_info, page, page_start, end + 1 - page_start);
  
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 9b0814318e72..a7de8cfdcac0 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -138,7 +138,7 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
  	}
  
  	btrfs_page_set_uptodate(fs_info, page, file_offset, block_size);
-	ClearPageChecked(page);
+	btrfs_page_clear_checked(fs_info, page, file_offset, block_size);
  	btrfs_page_set_dirty(fs_info, page, file_offset, block_size);
  out_unlock:
  	if (page) {
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index a61aa33aeeee..b8a420cb1683 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -468,6 +468,34 @@ void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info,
  		ClearPageOrdered(page);
  	spin_unlock_irqrestore(&subpage->lock, flags);
  }
+
+void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info,
+		struct page *page, u64 start, u32 len)
+{
+	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
+	const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned long flags;
+
+	spin_lock_irqsave(&subpage->lock, flags);
+	subpage->checked_bitmap |= tmp;
+	if (subpage->checked_bitmap == U16_MAX)
+		SetPageChecked(page);
+	spin_unlock_irqrestore(&subpage->lock, flags);
+}
+
+void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info,
+		struct page *page, u64 start, u32 len)
+{
+	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
+	const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned long flags;
+
+	spin_lock_irqsave(&subpage->lock, flags);
+	subpage->checked_bitmap &= ~tmp;
+	ClearPageChecked(page);
+	spin_unlock_irqrestore(&subpage->lock, flags);
+}
+
  /*
   * Unlike set/clear which is dependent on each page status, for test all bits
   * are tested in the same way.
@@ -491,6 +519,7 @@ IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(error);
  IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty);
  IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback);
  IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered);
+IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked);
  
  /*
   * Note that, in selftests (extent-io-tests), we can have empty fs_info passed
@@ -561,6 +590,8 @@ IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback,
  			 PageWriteback);
  IMPLEMENT_BTRFS_PAGE_OPS(ordered, SetPageOrdered, ClearPageOrdered,
  			 PageOrdered);
+IMPLEMENT_BTRFS_PAGE_OPS(checked, SetPageChecked, ClearPageChecked,
+			 PageChecked);
  
  void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
  				 struct page *page)
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index 9aa40d795ba9..6fb54b22b295 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -44,6 +44,13 @@ struct btrfs_subpage {
  
  			/* Tracke pending ordered extent in this sector */
  			u16 ordered_bitmap;
+
+			/*
+			 * If the sector is already checked, thus no need to go
+			 * through csum verification.
+			 * Used by both compression and cow fixup.
+			 */
+			u16 checked_bitmap;
  		};
  	};
  };
@@ -122,6 +129,7 @@ DECLARE_BTRFS_SUBPAGE_OPS(error);
  DECLARE_BTRFS_SUBPAGE_OPS(dirty);
  DECLARE_BTRFS_SUBPAGE_OPS(writeback);
  DECLARE_BTRFS_SUBPAGE_OPS(ordered);
+DECLARE_BTRFS_SUBPAGE_OPS(checked);
  
  bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
  		struct page *page, u64 start, u32 len);
  
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help