Re: [PATCH v3 3/9] btrfs: hunt down the BUG_ON()s inside... | linux-btrfs

Re: [PATCH v3 3/9] btrfs: hunt down the BUG_ON()s inside btrfs_submit_compressed_read()

From: David Sterba <hidden>
Date: 2021-06-16 14:06:22

On Tue, Jun 15, 2021 at 08:18:30PM +0800, Qu Wenruo wrote:

There are quite some BUG_ON()s inside btrfs_submit_compressed_read(),
namingly all errors inside the for() loop relies on BUG_ON() to handle
-ENOMEM.

Hunt down these BUG_ON()s properly by:

- Introduce compressed_bio::pending_bios_wait
  This allows us to wait for any submitted bio to finish, while still
  keeps the compressed_bio from being freed, as we should have
  compressed_bio::io_sectors not zero.

- Introduce finish_compressed_bio_read() to finish the compressed_bio

- Properly end the bio and finish compressed_bio when error happens

Now in btrfs_submit_compressed_read() even when the bio submission
failed, we can properly handle the error without triggering BUG_ON().

Signed-off-by: Qu Wenruo <redacted>

Please change the subject to something like "btrfs: do proper error
handling in btrfs_submit_compressed_read", same for the other patch.

quoted hunk ↗ jump to hunk

---
 fs/btrfs/compression.c | 127 ++++++++++++++++++++++++++---------------
 fs/btrfs/compression.h |   3 +
 2 files changed, 85 insertions(+), 45 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index bbfee9ffd20a..abbdb8d35001 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c

@@ -220,7 +220,6 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb,
 		cb->errors = 1;
 
 	ASSERT(bi_size && bi_size <= cb->compressed_len);
-	atomic_dec(&cb->pending_bios);
 
 	/*
 	 * Here we only need to check io_sectors, as if that is 0, we definily

@@ -232,9 +231,55 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb,
 	ASSERT(atomic_read(&cb->io_sectors) <
 	       (cb->compressed_len >> fs_info->sectorsize_bits));
 
+	/*
+	 * Here we must wake up pending_bio_wait after all other operations on
+	 * @cb finished, or we can race with finish_compressed_bio_*() in
+	 * error path.
+	 */
+	atomic_dec(&cb->pending_bios);
+	wake_up(&cb->pending_bio_wait);
+
 	return last_io;
 }
 
+static void finish_compressed_bio_read(struct compressed_bio *cb,
+				       struct bio *bio)
+{
+	unsigned int index;
+	struct page *page;
+
+	/* release the compressed pages */

Please fix/update comments in code that gets moved, here it's the
uppercase

+	for (index = 0; index < cb->nr_pages; index++) {
+		page = cb->compressed_pages[index];
+		page->mapping = NULL;
+		put_page(page);
+	}
+
+	/* do io completion on the original bio */

	/* Do io ... */

quoted hunk ↗ jump to hunk

+	if (cb->errors) {
+		bio_io_error(cb->orig_bio);
+	} else {
+		struct bio_vec *bvec;
+		struct bvec_iter_all iter_all;
+
+		ASSERT(bio);
+		ASSERT(!bio->bi_status);
+		/*
+		 * we have verified the checksum already, set page
+		 * checked so the end_io handlers know about it
+		 */
+		ASSERT(!bio_flagged(bio, BIO_CLONED));
+		bio_for_each_segment_all(bvec, cb->orig_bio, iter_all)
+			SetPageChecked(bvec->bv_page);
+
+		bio_endio(cb->orig_bio);
+	}
+
+	/* finally free the cb struct */
+	kfree(cb->compressed_pages);
+	kfree(cb);
+}
+
 /* when we finish reading compressed pages from the disk, we
  * decompress them and then run the bio end_io routines on the
  * decompressed pages (in the inode address space).

@@ -249,8 +294,6 @@ static void end_compressed_bio_read(struct bio *bio)
 {
 	struct compressed_bio *cb = bio->bi_private;
 	struct inode *inode;
-	struct page *page;
-	unsigned int index;
 	unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
 	int ret = 0;

@@ -285,36 +328,7 @@ static void end_compressed_bio_read(struct bio *bio)
 csum_failed:
 	if (ret)
 		cb->errors = 1;
-
-	/* release the compressed pages */
-	index = 0;
-	for (index = 0; index < cb->nr_pages; index++) {
-		page = cb->compressed_pages[index];
-		page->mapping = NULL;
-		put_page(page);
-	}
-
-	/* do io completion on the original bio */
-	if (cb->errors) {
-		bio_io_error(cb->orig_bio);
-	} else {
-		struct bio_vec *bvec;
-		struct bvec_iter_all iter_all;
-
-		/*
-		 * we have verified the checksum already, set page
-		 * checked so the end_io handlers know about it
-		 */
-		ASSERT(!bio_flagged(bio, BIO_CLONED));
-		bio_for_each_segment_all(bvec, cb->orig_bio, iter_all)
-			SetPageChecked(bvec->bv_page);
-
-		bio_endio(cb->orig_bio);
-	}
-
-	/* finally free the cb struct */
-	kfree(cb->compressed_pages);
-	kfree(cb);
+	finish_compressed_bio_read(cb, bio);
 out:
 	bio_put(bio);
 }

@@ -440,6 +454,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 		return BLK_STS_RESOURCE;
 	atomic_set(&cb->pending_bios, 0);
 	atomic_set(&cb->io_sectors, compressed_len >> fs_info->sectorsize_bits);
+	init_waitqueue_head(&cb->pending_bio_wait);
 	cb->errors = 0;
 	cb->inode = &inode->vfs_inode;
 	cb->start = start;

@@ -723,6 +738,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 	atomic_set(&cb->pending_bios, 0);
 	atomic_set(&cb->io_sectors, compressed_len >> fs_info->sectorsize_bits);
+	init_waitqueue_head(&cb->pending_bio_wait);
 	cb->errors = 0;
 	cb->inode = inode;
 	cb->mirror_num = mirror_num;

@@ -798,20 +814,20 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			atomic_inc(&cb->pending_bios);
 			ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
 						  BTRFS_WQ_ENDIO_DATA);
-			BUG_ON(ret); /* -ENOMEM */
+			if (ret)
+				goto finish_cb;
 
 			ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
-			BUG_ON(ret); /* -ENOMEM */
+			if (ret)
+				goto finish_cb;
 
 			nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
 						  fs_info->sectorsize);
 			sums += fs_info->csum_size * nr_sectors;
 
 			ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
-			if (ret) {
-				comp_bio->bi_status = ret;
-				bio_endio(comp_bio);
-			}
+			if (ret)
+				goto finish_cb;
 
 			comp_bio = btrfs_bio_alloc(cur_disk_byte);
 			comp_bio->bi_opf = REQ_OP_READ;

@@ -825,16 +841,16 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 	atomic_inc(&cb->pending_bios);
 	ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
-	BUG_ON(ret); /* -ENOMEM */
+	if (ret)
+		goto last_bio;
 
 	ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
-	BUG_ON(ret); /* -ENOMEM */
+	if (ret)
+		goto last_bio;
 
 	ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
-	if (ret) {
-		comp_bio->bi_status = ret;
-		bio_endio(comp_bio);
-	}
+	if (ret)
+		goto last_bio;
 
 	return 0;

@@ -850,6 +866,27 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 out:
 	free_extent_map(em);
 	return ret;
+last_bio:
+	cb->errors = 1;
+	comp_bio->bi_status = ret;
+	/* This is the last bio, endio functions will free @cb */
+	bio_endio(comp_bio);
+	return ret;
+finish_cb:
+	cb->errors = 1;
+	if (comp_bio) {
+		comp_bio->bi_status = ret;
+		bio_endio(comp_bio);
+	}
+	/*
+	 * Even with previous bio ended, we should still have io not yet
+	 * submitted, thus need to finish @cb manually.
+	 */
+	ASSERT(atomic_read(&cb->io_sectors));
+	wait_event(cb->pending_bio_wait, atomic_read(&cb->pending_bios) == 0);
+	/* Now we are the only one referring @cb, can finish it safely. */
+	finish_compressed_bio_read(cb, NULL);
+	return ret;
 }
 
 /*

diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 41dd0bf6d5db..6f6c14f83c74 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h

@@ -39,6 +39,9 @@ struct compressed_bio {
 	 */
 	atomic_t io_sectors;
 
+	/* To wait for any submitted bio, used in error handling */
+	wait_queue_head_t pending_bio_wait;

This adds 24 bytes to the structure and it's only used for error
handling, so that does not seem justified enough.

There are system-wide wait queues, shared with other subsystems but it
looks like a better fit for the exceptional case of errors. See commit
6b2bb7265f0b62605 for more details, the change is otherwise trivial and
the api functions are wait_var_event(&variable, condition) and
wake_up_var(&variable), where the variable is a unique key which would
be the compressed_bio.

+
 	/* Number of compressed pages in the array */
 	unsigned int nr_pages;
 
-- 
2.32.0

`h`	back out one level
`j`	next message in thread
`k`	previous message in thread
`l`	drill in
`Esc`	close help / fold thread tree
`?`	toggle this help