Re: [PATCH 19/26] Btrfs: add code to scrub to copy read data to another disk

From: Tsutomu Itoh <hidden>
Date: 2012-11-07 00:31:22

(2012/11/07 1:38), Stefan Behrens wrote:

quoted hunk ↗ jump to hunk

The device replace procedure makes use of the scrub code. The scrub
code is the most efficient code to read the allocated data of a disk,
i.e. it reads sequentially in order to avoid disk head movements, it
skips unallocated blocks, it uses read ahead mechanisms, and it
contains all the code to detect and repair defects.
This commit adds code to scrub to allow the scrub code to copy read
data to another disk.
One goal is to be able to perform as fast as possible. Therefore the
write requests are collected until huge bios are build, and the
write process is decoupled from the read process with some kind of
flow control, of course, in order to limit the allocated memory.
The best performance on spinning disks could by reached when the
head movements are avoided as much as possible. Therefore a single
worker is used to interface the read process with the write process.
The regular scrub operation works as fast as before, it is not
negatively influenced and actually it is more or less unchanged.

Signed-off-by: Stefan Behrens <redacted>
---
  fs/btrfs/ctree.h |   2 +
  fs/btrfs/reada.c |  10 +-
  fs/btrfs/scrub.c | 881 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
  fs/btrfs/super.c |   3 +-
  4 files changed, 823 insertions(+), 73 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 83904b5..e17f211 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h

@@ -1483,6 +1483,8 @@ struct btrfs_fs_info {
  	struct rw_semaphore scrub_super_lock;
  	int scrub_workers_refcnt;
  	struct btrfs_workers scrub_workers;
+	struct btrfs_workers scrub_wr_completion_workers;
+	struct btrfs_workers scrub_nocow_workers;
  
  #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
  	u32 check_integrity_print_mask;

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 0ddc565..9f363e1 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c

@@ -418,12 +418,17 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
  			 */
  			continue;
  		}
+		if (!dev->bdev) {
+			/* cannot read ahead on missing device */
+			continue;
+		}
  		prev_dev = dev;
  		ret = radix_tree_insert(&dev->reada_extents, index, re);
  		if (ret) {
  			while (--i >= 0) {
  				dev = bbio->stripes[i].dev;
  				BUG_ON(dev == NULL);
+				/* ignore whether the entry was inserted */
  				radix_tree_delete(&dev->reada_extents, index);
  			}
  			BUG_ON(fs_info == NULL);

@@ -914,7 +919,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
  	generation = btrfs_header_generation(node);
  	free_extent_buffer(node);
  
-	reada_add_block(rc, start, &max_key, level, generation);
+	if (reada_add_block(rc, start, &max_key, level, generation)) {
+		kfree(rc);
+		return ERR_PTR(-ENOMEM);
+	}
  
  	reada_start_machine(root->fs_info);

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 460e30b..59c69e0 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c

@@ -25,6 +25,7 @@
  #include "transaction.h"
  #include "backref.h"
  #include "extent_io.h"
+#include "dev-replace.h"
  #include "check-integrity.h"
  #include "rcu-string.h"

@@ -44,8 +45,15 @@
  struct scrub_block;
  struct scrub_ctx;
  
-#define SCRUB_PAGES_PER_BIO	16	/* 64k per bio */
-#define SCRUB_BIOS_PER_CTX	16	/* 1 MB per device in flight */
+/*
+ * the following three values only influence the performance.
+ * The last one configures the number of parallel and outstanding I/O
+ * operations. The first two values configure an upper limit for the number
+ * of (dynamically allocated) pages that are added to a bio.
+ */
+#define SCRUB_PAGES_PER_RD_BIO	32	/* 128k per bio */
+#define SCRUB_PAGES_PER_WR_BIO	32	/* 128k per bio */
+#define SCRUB_BIOS_PER_SCTX	64	/* 8MB per device in flight */
  
  /*
   * the following value times PAGE_SIZE needs to be large enough to match the

@@ -62,6 +70,7 @@ struct scrub_page {
  	u64			generation;
  	u64			logical;
  	u64			physical;
+	u64			physical_for_dev_replace;
  	atomic_t		ref_count;
  	struct {
  		unsigned int	mirror_num:8;

@@ -79,7 +88,11 @@ struct scrub_bio {
  	int			err;
  	u64			logical;
  	u64			physical;
-	struct scrub_page	*pagev[SCRUB_PAGES_PER_BIO];
+#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
+	struct scrub_page	*pagev[SCRUB_PAGES_PER_WR_BIO];
+#else
+	struct scrub_page	*pagev[SCRUB_PAGES_PER_RD_BIO];
+#endif
  	int			page_count;
  	int			next_free;
  	struct btrfs_work	work;

@@ -99,8 +112,16 @@ struct scrub_block {
  	};
  };
  
+struct scrub_wr_ctx {
+	struct scrub_bio *wr_curr_bio;
+	struct btrfs_device *tgtdev;
+	int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */
+	atomic_t flush_all_writes;
+	struct mutex wr_lock;
+};
+
  struct scrub_ctx {
-	struct scrub_bio	*bios[SCRUB_BIOS_PER_CTX];
+	struct scrub_bio	*bios[SCRUB_BIOS_PER_SCTX];
  	struct btrfs_root	*dev_root;
  	int			first_free;
  	int			curr;

@@ -112,12 +133,13 @@ struct scrub_ctx {
  	struct list_head	csum_list;
  	atomic_t		cancel_req;
  	int			readonly;
-	int			pages_per_bio; /* <= SCRUB_PAGES_PER_BIO */
+	int			pages_per_rd_bio;
  	u32			sectorsize;
  	u32			nodesize;
  	u32			leafsize;
  
  	int			is_dev_replace;
+	struct scrub_wr_ctx	wr_ctx;
  
  	/*
  	 * statistics

@@ -135,6 +157,15 @@ struct scrub_fixup_nodatasum {
  	int			mirror_num;
  };
  
+struct scrub_copy_nocow_ctx {
+	struct scrub_ctx	*sctx;
+	u64			logical;
+	u64			len;
+	int			mirror_num;
+	u64			physical_for_dev_replace;
+	struct btrfs_work	work;
+};
+
  struct scrub_warning {
  	struct btrfs_path	*path;
  	u64			extent_item_size;

@@ -156,8 +187,9 @@ static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
  static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
  static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
  				     struct btrfs_fs_info *fs_info,
+				     struct scrub_block *original_sblock,
  				     u64 length, u64 logical,
-				     struct scrub_block *sblock);
+				     struct scrub_block *sblocks_for_recheck);
  static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
  				struct scrub_block *sblock, int is_metadata,
  				int have_csum, u8 *csum, u64 generation,

@@ -174,6 +206,9 @@ static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
  static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
  					    struct scrub_block *sblock_good,
  					    int page_num, int force_write);
+static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
+static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
+					   int page_num);
  static int scrub_checksum_data(struct scrub_block *sblock);
  static int scrub_checksum_tree_block(struct scrub_block *sblock);
  static int scrub_checksum_super(struct scrub_block *sblock);

@@ -181,14 +216,38 @@ static void scrub_block_get(struct scrub_block *sblock);
  static void scrub_block_put(struct scrub_block *sblock);
  static void scrub_page_get(struct scrub_page *spage);
  static void scrub_page_put(struct scrub_page *spage);
-static int scrub_add_page_to_bio(struct scrub_ctx *sctx,
-				 struct scrub_page *spage);
+static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
+				    struct scrub_page *spage);
  static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
  		       u64 physical, struct btrfs_device *dev, u64 flags,
-		       u64 gen, int mirror_num, u8 *csum, int force);
+		       u64 gen, int mirror_num, u8 *csum, int force,
+		       u64 physical_for_dev_replace);
  static void scrub_bio_end_io(struct bio *bio, int err);
  static void scrub_bio_end_io_worker(struct btrfs_work *work);
  static void scrub_block_complete(struct scrub_block *sblock);
+static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
+			       u64 extent_logical, u64 extent_len,
+			       u64 *extent_physical,
+			       struct btrfs_device **extent_dev,
+			       int *extent_mirror_num);
+static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
+			      struct scrub_wr_ctx *wr_ctx,
+			      struct btrfs_fs_info *fs_info,
+			      struct btrfs_device *dev,
+			      int is_dev_replace);
+static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
+static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
+				    struct scrub_page *spage);
+static void scrub_wr_submit(struct scrub_ctx *sctx);
+static void scrub_wr_bio_end_io(struct bio *bio, int err);
+static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
+static int write_page_nocow(struct scrub_ctx *sctx,
+			    u64 physical_for_dev_replace, struct page *page);
+static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
+				      void *ctx);
+static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
+			    int mirror_num, u64 physical_for_dev_replace);
+static void copy_nocow_pages_worker(struct btrfs_work *work);
  
  
  static void scrub_pending_bio_inc(struct scrub_ctx *sctx)

@@ -262,19 +321,20 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
  	if (!sctx)
  		return;
  
+	scrub_free_wr_ctx(&sctx->wr_ctx);
+
  	/* this can happen when scrub is cancelled */
  	if (sctx->curr != -1) {
  		struct scrub_bio *sbio = sctx->bios[sctx->curr];
  
  		for (i = 0; i < sbio->page_count; i++) {
-			BUG_ON(!sbio->pagev[i]);
-			BUG_ON(!sbio->pagev[i]->page);
+			WARN_ON(!sbio->pagev[i]->page);
  			scrub_block_put(sbio->pagev[i]->sblock);
  		}
  		bio_put(sbio->bio);
  	}
  
-	for (i = 0; i < SCRUB_BIOS_PER_CTX; ++i) {
+	for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
  		struct scrub_bio *sbio = sctx->bios[i];
  
  		if (!sbio)

@@ -292,18 +352,29 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
  	struct scrub_ctx *sctx;
  	int		i;
  	struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
-	int pages_per_bio;
+	int pages_per_rd_bio;
+	int ret;
  
-	pages_per_bio = min_t(int, SCRUB_PAGES_PER_BIO,
-			      bio_get_nr_vecs(dev->bdev));
+	/*
+	 * the setting of pages_per_rd_bio is correct for scrub but might
+	 * be wrong for the dev_replace code where we might read from
+	 * different devices in the initial huge bios. However, that
+	 * code is able to correctly handle the case when adding a page
+	 * to a bio fails.
+	 */
+	if (dev->bdev)
+		pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO,
+					 bio_get_nr_vecs(dev->bdev));
+	else
+		pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
  	sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
  	if (!sctx)
  		goto nomem;
  	sctx->is_dev_replace = is_dev_replace;
-	sctx->pages_per_bio = pages_per_bio;
+	sctx->pages_per_rd_bio = pages_per_rd_bio;
  	sctx->curr = -1;
  	sctx->dev_root = dev->dev_root;
-	for (i = 0; i < SCRUB_BIOS_PER_CTX; ++i) {
+	for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
  		struct scrub_bio *sbio;
  
  		sbio = kzalloc(sizeof(*sbio), GFP_NOFS);

@@ -316,7 +387,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
  		sbio->page_count = 0;
  		sbio->work.func = scrub_bio_end_io_worker;
  
-		if (i != SCRUB_BIOS_PER_CTX - 1)
+		if (i != SCRUB_BIOS_PER_SCTX - 1)
  			sctx->bios[i]->next_free = i + 1;
  		else
  			sctx->bios[i]->next_free = -1;

@@ -334,6 +405,13 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
  	spin_lock_init(&sctx->list_lock);
  	spin_lock_init(&sctx->stat_lock);
  	init_waitqueue_head(&sctx->list_wait);
+
+	ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
+				 fs_info->dev_replace.tgtdev, is_dev_replace);
+	if (ret) {
+		scrub_free_ctx(sctx);
+		return ERR_PTR(ret);
+	}
  	return sctx;
  
  nomem:

@@ -341,7 +419,8 @@ nomem:
  	return ERR_PTR(-ENOMEM);
  }
  
-static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
+static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
+				     void *warn_ctx)
  {
  	u64 isize;
  	u32 nlink;

@@ -349,7 +428,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
  	int i;
  	struct extent_buffer *eb;
  	struct btrfs_inode_item *inode_item;
-	struct scrub_warning *swarn = ctx;
+	struct scrub_warning *swarn = warn_ctx;
  	struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
  	struct inode_fs_paths *ipath = NULL;
  	struct btrfs_root *local_root;

@@ -492,11 +571,11 @@ out:
  	kfree(swarn.msg_buf);
  }
  
-static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx)
+static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
  {
  	struct page *page = NULL;
  	unsigned long index;
-	struct scrub_fixup_nodatasum *fixup = ctx;
+	struct scrub_fixup_nodatasum *fixup = fixup_ctx;
  	int ret;
  	int corrected = 0;
  	struct btrfs_key key;

@@ -660,7 +739,9 @@ out:
  		spin_lock(&sctx->stat_lock);
  		++sctx->stat.uncorrectable_errors;
  		spin_unlock(&sctx->stat_lock);
-
+		btrfs_dev_replace_stats_inc(
+			&sctx->dev_root->fs_info->dev_replace.
+			num_uncorrectable_read_errors);
  		printk_ratelimited_in_rcu(KERN_ERR
  			"btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
  			(unsigned long long)fixup->logical,

@@ -715,6 +796,11 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
  	csum = sblock_to_check->pagev[0]->csum;
  	dev = sblock_to_check->pagev[0]->dev;
  
+	if (sctx->is_dev_replace && !is_metadata && !have_csum) {
+		sblocks_for_recheck = NULL;
+		goto nodatasum_case;
+	}
+
  	/*
  	 * read all mirrors one after the other. This includes to
  	 * re-read the extent or metadata block that failed (that was

@@ -758,7 +844,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
  	}
  
  	/* setup the context, map the logical blocks and alloc the pages */
-	ret = scrub_setup_recheck_block(sctx, fs_info, length,
+	ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length,
  					logical, sblocks_for_recheck);
  	if (ret) {
  		spin_lock(&sctx->stat_lock);

@@ -789,6 +875,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
  		sctx->stat.unverified_errors++;
  		spin_unlock(&sctx->stat_lock);
  
+		if (sctx->is_dev_replace)
+			scrub_write_block_to_dev_replace(sblock_bad);
  		goto out;
  	}

@@ -822,12 +910,15 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
  				BTRFS_DEV_STAT_CORRUPTION_ERRS);
  	}
  
-	if (sctx->readonly)
+	if (sctx->readonly && !sctx->is_dev_replace)
  		goto did_not_correct_error;
  
  	if (!is_metadata && !have_csum) {
  		struct scrub_fixup_nodatasum *fixup_nodatasum;
  
+nodatasum_case:
+		WARN_ON(sctx->is_dev_replace);
+
  		/*
  		 * !is_metadata and !have_csum, this means that the data
  		 * might not be COW'ed, that it might be modified

@@ -883,18 +974,79 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
  		if (!sblock_other->header_error &&
  		    !sblock_other->checksum_error &&
  		    sblock_other->no_io_error_seen) {
-			int force_write = is_metadata || have_csum;
-
-			ret = scrub_repair_block_from_good_copy(sblock_bad,
-								sblock_other,
-								force_write);
+			if (sctx->is_dev_replace) {
+				scrub_write_block_to_dev_replace(sblock_other);
+			} else {
+				int force_write = is_metadata || have_csum;
+
+				ret = scrub_repair_block_from_good_copy(
+						sblock_bad, sblock_other,
+						force_write);
+			}
  			if (0 == ret)
  				goto corrected_error;
  		}
  	}
  
  	/*
-	 * in case of I/O errors in the area that is supposed to be
+	 * for dev_replace, pick good pages and write to the target device.
+	 */
+	if (sctx->is_dev_replace) {
+		success = 1;
+		for (page_num = 0; page_num < sblock_bad->page_count;
+		     page_num++) {
+			int sub_success;
+
+			sub_success = 0;
+			for (mirror_index = 0;
+			     mirror_index < BTRFS_MAX_MIRRORS &&
+			     sblocks_for_recheck[mirror_index].page_count > 0;
+			     mirror_index++) {
+				struct scrub_block *sblock_other =
+					sblocks_for_recheck + mirror_index;
+				struct scrub_page *page_other =
+					sblock_other->pagev[page_num];
+
+				if (!page_other->io_error) {
+					ret = scrub_write_page_to_dev_replace(
+							sblock_other, page_num);
+					if (ret == 0) {
+						/* succeeded for this page */
+						sub_success = 1;
+						break;
+					} else {
+						btrfs_dev_replace_stats_inc(
+							&sctx->dev_root->
+							fs_info->dev_replace.
+							num_write_errors);
+					}
+				}
+			}
+
+			if (!sub_success) {
+				/*
+				 * did not find a mirror to fetch the page
+				 * from. scrub_write_page_to_dev_replace()
+				 * handles this case (page->io_error), by
+				 * filling the block with zeros before
+				 * submitting the write request
+				 */
+				success = 0;
+				ret = scrub_write_page_to_dev_replace(
+						sblock_bad, page_num);
+				if (ret)
+					btrfs_dev_replace_stats_inc(
+						&sctx->dev_root->fs_info->
+						dev_replace.num_write_errors);
+			}
+		}
+
+		goto out;
+	}
+
+	/*
+	 * for regular scrub, repair those pages that are errored.
+	 * In case of I/O errors in the area that is supposed to be
  	 * repaired, continue by picking good copies of those pages.
  	 * Select the good pages from mirrors to rewrite bad pages from
  	 * the area to fix. Afterwards verify the checksum of the block

@@ -1017,6 +1169,7 @@ out:
  
  static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
  				     struct btrfs_fs_info *fs_info,
+				     struct scrub_block *original_sblock,
  				     u64 length, u64 logical,
  				     struct scrub_block *sblocks_for_recheck)
  {

@@ -1047,7 +1200,7 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
  			return -EIO;
  		}
  
-		BUG_ON(page_index >= SCRUB_PAGES_PER_BIO);
+		BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
  		for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
  		     mirror_index++) {
  			struct scrub_block *sblock;

@@ -1071,6 +1224,10 @@ leave_nomem:
  			sblock->pagev[page_index] = page;
  			page->logical = logical;
  			page->physical = bbio->stripes[mirror_index].physical;
+			BUG_ON(page_index >= original_sblock->page_count);
+			page->physical_for_dev_replace =
+				original_sblock->pagev[page_index]->
+				physical_for_dev_replace;
  			/* for missing devices, dev->bdev is NULL */
  			page->dev = bbio->stripes[mirror_index].dev;
  			page->mirror_num = mirror_index + 1;

@@ -1249,6 +1406,12 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
  		int ret;
  		DECLARE_COMPLETION_ONSTACK(complete);
  
+		if (!page_bad->dev->bdev) {
+			printk_ratelimited(KERN_WARNING
+				"btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n");
+			return -EIO;
+		}
+
  		bio = bio_alloc(GFP_NOFS, 1);
  		if (!bio)
  			return -EIO;

@@ -1269,6 +1432,9 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
  		if (!bio_flagged(bio, BIO_UPTODATE)) {
  			btrfs_dev_stat_inc_and_print(page_bad->dev,
  				BTRFS_DEV_STAT_WRITE_ERRS);
+			btrfs_dev_replace_stats_inc(
+				&sblock_bad->sctx->dev_root->fs_info->
+				dev_replace.num_write_errors);
  			bio_put(bio);
  			return -EIO;
  		}

@@ -1278,7 +1444,166 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
  	return 0;
  }
  
-static void scrub_checksum(struct scrub_block *sblock)
+static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
+{
+	int page_num;
+
+	for (page_num = 0; page_num < sblock->page_count; page_num++) {
+		int ret;
+
+		ret = scrub_write_page_to_dev_replace(sblock, page_num);
+		if (ret)
+			btrfs_dev_replace_stats_inc(
+				&sblock->sctx->dev_root->fs_info->dev_replace.
+				num_write_errors);
+	}
+}
+
+static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
+					   int page_num)
+{
+	struct scrub_page *spage = sblock->pagev[page_num];
+
+	BUG_ON(spage->page == NULL);
+	if (spage->io_error) {
+		void *mapped_buffer = kmap_atomic(spage->page);
+
+		memset(mapped_buffer, 0, PAGE_CACHE_SIZE);
+		flush_dcache_page(spage->page);
+		kunmap_atomic(mapped_buffer);
+	}
+	return scrub_add_page_to_wr_bio(sblock->sctx, spage);
+}
+
+static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
+				    struct scrub_page *spage)
+{
+	struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
+	struct scrub_bio *sbio;
+	int ret;
+
+	mutex_lock(&wr_ctx->wr_lock);
+again:
+	if (!wr_ctx->wr_curr_bio) {
+		wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
+					      GFP_NOFS);
+		if (!wr_ctx->wr_curr_bio)

I think mutex_unlock(&wr_ctx->wr_lock) is necessary before it returns.

+			return -ENOMEM;
+		wr_ctx->wr_curr_bio->sctx = sctx;
+		wr_ctx->wr_curr_bio->page_count = 0;
+	}

...
...

- Tsutomu

`h`	back out one level
`j`	next message in thread
`k`	previous message in thread
`l`	drill in
`Esc`	close help / fold thread tree
`?`	toggle this help