Re: [PATCH v3 3/9] btrfs: hunt down the BUG_ON()s inside btrfs_submit_compressed_read()
From: David Sterba <hidden>
Date: 2021-06-16 14:06:22
On Tue, Jun 15, 2021 at 08:18:30PM +0800, Qu Wenruo wrote:
There are quite some BUG_ON()s inside btrfs_submit_compressed_read(), namingly all errors inside the for() loop relies on BUG_ON() to handle -ENOMEM. Hunt down these BUG_ON()s properly by: - Introduce compressed_bio::pending_bios_wait This allows us to wait for any submitted bio to finish, while still keeps the compressed_bio from being freed, as we should have compressed_bio::io_sectors not zero. - Introduce finish_compressed_bio_read() to finish the compressed_bio - Properly end the bio and finish compressed_bio when error happens Now in btrfs_submit_compressed_read() even when the bio submission failed, we can properly handle the error without triggering BUG_ON(). Signed-off-by: Qu Wenruo <redacted>
Please change the subject to something like "btrfs: do proper error handling in btrfs_submit_compressed_read", same for the other patch.
quoted hunk ↗ jump to hunk
--- fs/btrfs/compression.c | 127 ++++++++++++++++++++++++++--------------- fs/btrfs/compression.h | 3 + 2 files changed, 85 insertions(+), 45 deletions(-)diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index bbfee9ffd20a..abbdb8d35001 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c@@ -220,7 +220,6 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, cb->errors = 1; ASSERT(bi_size && bi_size <= cb->compressed_len); - atomic_dec(&cb->pending_bios); /* * Here we only need to check io_sectors, as if that is 0, we definily@@ -232,9 +231,55 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, ASSERT(atomic_read(&cb->io_sectors) < (cb->compressed_len >> fs_info->sectorsize_bits)); + /* + * Here we must wake up pending_bio_wait after all other operations on + * @cb finished, or we can race with finish_compressed_bio_*() in + * error path. + */ + atomic_dec(&cb->pending_bios); + wake_up(&cb->pending_bio_wait); + return last_io; } +static void finish_compressed_bio_read(struct compressed_bio *cb, + struct bio *bio) +{ + unsigned int index; + struct page *page; + + /* release the compressed pages */
Please fix/update comments in code that gets moved, here it's the uppercase
+ for (index = 0; index < cb->nr_pages; index++) {
+ page = cb->compressed_pages[index];
+ page->mapping = NULL;
+ put_page(page);
+ }
+
+ /* do io completion on the original bio *//* Do io ... */
quoted hunk ↗ jump to hunk
+ if (cb->errors) { + bio_io_error(cb->orig_bio); + } else { + struct bio_vec *bvec; + struct bvec_iter_all iter_all; + + ASSERT(bio); + ASSERT(!bio->bi_status); + /* + * we have verified the checksum already, set page + * checked so the end_io handlers know about it + */ + ASSERT(!bio_flagged(bio, BIO_CLONED)); + bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) + SetPageChecked(bvec->bv_page); + + bio_endio(cb->orig_bio); + } + + /* finally free the cb struct */ + kfree(cb->compressed_pages); + kfree(cb); +} + /* when we finish reading compressed pages from the disk, we * decompress them and then run the bio end_io routines on the * decompressed pages (in the inode address space).@@ -249,8 +294,6 @@ static void end_compressed_bio_read(struct bio *bio) { struct compressed_bio *cb = bio->bi_private; struct inode *inode; - struct page *page; - unsigned int index; unsigned int mirror = btrfs_io_bio(bio)->mirror_num; int ret = 0;@@ -285,36 +328,7 @@ static void end_compressed_bio_read(struct bio *bio) csum_failed: if (ret) cb->errors = 1; - - /* release the compressed pages */ - index = 0; - for (index = 0; index < cb->nr_pages; index++) { - page = cb->compressed_pages[index]; - page->mapping = NULL; - put_page(page); - } - - /* do io completion on the original bio */ - if (cb->errors) { - bio_io_error(cb->orig_bio); - } else { - struct bio_vec *bvec; - struct bvec_iter_all iter_all; - - /* - * we have verified the checksum already, set page - * checked so the end_io handlers know about it - */ - ASSERT(!bio_flagged(bio, BIO_CLONED)); - bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) - SetPageChecked(bvec->bv_page); - - bio_endio(cb->orig_bio); - } - - /* finally free the cb struct */ - kfree(cb->compressed_pages); - kfree(cb); + finish_compressed_bio_read(cb, bio); out: bio_put(bio); }@@ -440,6 +454,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, return BLK_STS_RESOURCE; atomic_set(&cb->pending_bios, 0); atomic_set(&cb->io_sectors, compressed_len >> fs_info->sectorsize_bits); + init_waitqueue_head(&cb->pending_bio_wait); cb->errors = 0; cb->inode = &inode->vfs_inode; cb->start = start;@@ -723,6 +738,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, atomic_set(&cb->pending_bios, 0); atomic_set(&cb->io_sectors, compressed_len >> fs_info->sectorsize_bits); + init_waitqueue_head(&cb->pending_bio_wait); cb->errors = 0; cb->inode = inode; cb->mirror_num = mirror_num;@@ -798,20 +814,20 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, atomic_inc(&cb->pending_bios); ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto finish_cb; ret = btrfs_lookup_bio_sums(inode, comp_bio, sums); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto finish_cb; nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size, fs_info->sectorsize); sums += fs_info->csum_size * nr_sectors; ret = btrfs_map_bio(fs_info, comp_bio, mirror_num); - if (ret) { - comp_bio->bi_status = ret; - bio_endio(comp_bio); - } + if (ret) + goto finish_cb; comp_bio = btrfs_bio_alloc(cur_disk_byte); comp_bio->bi_opf = REQ_OP_READ;@@ -825,16 +841,16 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, atomic_inc(&cb->pending_bios); ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto last_bio; ret = btrfs_lookup_bio_sums(inode, comp_bio, sums); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto last_bio; ret = btrfs_map_bio(fs_info, comp_bio, mirror_num); - if (ret) { - comp_bio->bi_status = ret; - bio_endio(comp_bio); - } + if (ret) + goto last_bio; return 0;@@ -850,6 +866,27 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, out: free_extent_map(em); return ret; +last_bio: + cb->errors = 1; + comp_bio->bi_status = ret; + /* This is the last bio, endio functions will free @cb */ + bio_endio(comp_bio); + return ret; +finish_cb: + cb->errors = 1; + if (comp_bio) { + comp_bio->bi_status = ret; + bio_endio(comp_bio); + } + /* + * Even with previous bio ended, we should still have io not yet + * submitted, thus need to finish @cb manually. + */ + ASSERT(atomic_read(&cb->io_sectors)); + wait_event(cb->pending_bio_wait, atomic_read(&cb->pending_bios) == 0); + /* Now we are the only one referring @cb, can finish it safely. */ + finish_compressed_bio_read(cb, NULL); + return ret; } /*diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 41dd0bf6d5db..6f6c14f83c74 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h@@ -39,6 +39,9 @@ struct compressed_bio { */ atomic_t io_sectors; + /* To wait for any submitted bio, used in error handling */ + wait_queue_head_t pending_bio_wait;
This adds 24 bytes to the structure and it's only used for error handling, so that does not seem justified enough. There are system-wide wait queues, shared with other subsystems but it looks like a better fit for the exceptional case of errors. See commit 6b2bb7265f0b62605 for more details, the change is otherwise trivial and the api functions are wait_var_event(&variable, condition) and wake_up_var(&variable), where the variable is a unique key which would be the compressed_bio.
+ /* Number of compressed pages in the array */ unsigned int nr_pages; -- 2.32.0