Thread (2 messages) 2 messages, 1 author, 2021-09-30

Re: [PATCH v7] btrfs: consolidate device_list_mutex in prepare_sprout to its parent

From: Anand Jain <hidden>
Date: 2021-09-30 13:42:30


On 30/09/2021 20:16, Anand Jain wrote:
btrfs_prepare_sprout() splices seed devices into its own struct fs_devices,
so that its parent function btrfs_init_new_device() can add the new sprout
device to fs_info->fs_devices.

Both btrfs_prepare_sprout() and btrfs_init_new_device() needs
device_list_mutex. But they are holding it sequentially, thus creates a
small window to an opportunity to race. Close this opportunity and hold
device_list_mutex common to both btrfs_init_new_device() and
btrfs_prepare_sprout().

This patch splits btrfs_prepare_sprout() into btrfs_alloc_sprout() and
btrfs_splice_sprout(). This split is essential because device_list_mutex
shouldn't be held for btrfs_alloc_sprout() but must be held for
btrfs_splice_sprout(). So now a common device_list_mutex can be used
between btrfs_init_new_device() and btrfs_splice_sprout().

s/btrfs_alloc_sprout/btrfs_init_sprout/g
s/btrfs_splice_sprout/btrfs_setup_sprout/g

The changelog did not follow the new function names. My bad.

Before I update these and send another reroll, I will wait for the 
comments, if any.

Thanks, Anand
quoted hunk ↗ jump to hunk
This patch also moves the lockdep_assert_held(&uuid_mutex) from the
starting of the function to just above the line where we need this lock.

Signed-off-by: Anand Jain <redacted>
---
v7:
  . Not part of the patchset "btrfs: cleanup prepare_sprout" anymore as
  1/3 is merged and 2/3 is dropped.
  . Rename btrfs_alloc_sprout() to btrfs_init_sprout() as it does more
  than just alloc and change return to btrfs_device *.
  . Rename btrfs_splice_sprout() to btrfs_setup_sprout() as it does more
  than just the splice.
  . Add lockdep_assert_held(&uuid_mutex) and
  lockdep_assert_held(&fs_devices->device_list_mutex) in btrfs_setup_sprout().

v6:
  Remove RFC.
  Split btrfs_prepare_sprout so that the allocation part can be outside
  of the device_list_mutex in the parent function btrfs_init_new_device().

  fs/btrfs/volumes.c | 73 +++++++++++++++++++++++++++++++++-------------
  1 file changed, 53 insertions(+), 20 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8e2b76b5fd14..10227b13a1a6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2378,21 +2378,14 @@ struct btrfs_device *btrfs_find_device_by_devspec(
  	return btrfs_find_device_by_path(fs_info, device_path);
  }
  
-/*
- * does all the dirty work required for changing file system's UUID.
- */
-static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
+static struct btrfs_fs_devices *btrfs_init_sprout(struct btrfs_fs_info *fs_info)
  {
  	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
  	struct btrfs_fs_devices *old_devices;
  	struct btrfs_fs_devices *seed_devices;
-	struct btrfs_super_block *disk_super = fs_info->super_copy;
-	struct btrfs_device *device;
-	u64 super_flags;
  
-	lockdep_assert_held(&uuid_mutex);
  	if (!fs_devices->seeding)
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
  
  	/*
  	 * Private copy of the seed devices, anchored at
@@ -2400,7 +2393,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
  	 */
  	seed_devices = alloc_fs_devices(NULL, NULL);
  	if (IS_ERR(seed_devices))
-		return PTR_ERR(seed_devices);
+		return seed_devices;
  
  	/*
  	 * It's necessary to retain a copy of the original seed fs_devices in
@@ -2411,9 +2404,10 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
  	old_devices = clone_fs_devices(fs_devices);
  	if (IS_ERR(old_devices)) {
  		kfree(seed_devices);
-		return PTR_ERR(old_devices);
+		return old_devices;
  	}
  
+	lockdep_assert_held(&uuid_mutex);
  	list_add(&old_devices->fs_list, &fs_uuids);
  
  	memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
@@ -2422,7 +2416,41 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
  	INIT_LIST_HEAD(&seed_devices->alloc_list);
  	mutex_init(&seed_devices->device_list_mutex);
  
-	mutex_lock(&fs_devices->device_list_mutex);
+	return seed_devices;
+}
+
+/*
+ * Splice seed devices into the sprout fs_devices.
+ * Generate a new fsid for the sprouted readwrite btrfs.
+ */
+static void btrfs_setup_sprout(struct btrfs_fs_info *fs_info,
+			       struct btrfs_fs_devices *seed_devices)
+{
+	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+	struct btrfs_super_block *disk_super = fs_info->super_copy;
+	struct btrfs_device *device;
+	u64 super_flags;
+
+	/*
+	 * We are updating the fsid, the thread leading to device_list_add()
+	 * could race, so uuid_mutex is needed.
+	 */
+	lockdep_assert_held(&uuid_mutex);
+
+	/*
+	 * Below threads though they parse dev_list they are fine without
+	 * device_list_mutex:
+	 *   All device ops and balance - as we are in btrfs_exclop_start.
+	 *   Various dev_list read parser - are using rcu.
+	 *   btrfs_ioctl_fitrim() - is using rcu.
+	 *
+	 * For-read threads as below are using device_list_mutex:
+	 *   Readonly scrub btrfs_scrub_dev()
+	 *   Readonly scrub btrfs_scrub_progress()
+	 *   btrfs_get_dev_stats()
+	 */
+	lockdep_assert_held(&fs_devices->device_list_mutex);
+
  	list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
  			      synchronize_rcu);
  	list_for_each_entry(device, &seed_devices->devices, dev_list)
@@ -2438,13 +2466,10 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
  	generate_random_uuid(fs_devices->fsid);
  	memcpy(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE);
  	memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
-	mutex_unlock(&fs_devices->device_list_mutex);
  
  	super_flags = btrfs_super_flags(disk_super) &
  		      ~BTRFS_SUPER_FLAG_SEEDING;
  	btrfs_set_super_flags(disk_super, super_flags);
-
-	return 0;
  }
  
  /*
@@ -2532,6 +2557,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
  	struct super_block *sb = fs_info->sb;
  	struct rcu_string *name;
  	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+	struct btrfs_fs_devices *seed_devices;
  	u64 orig_super_total_bytes;
  	u64 orig_super_num_devices;
  	int ret = 0;
@@ -2615,18 +2641,25 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
  
  	if (seeding_dev) {
  		btrfs_clear_sb_rdonly(sb);
-		ret = btrfs_prepare_sprout(fs_info);
-		if (ret) {
-			btrfs_abort_transaction(trans, ret);
+
+		/* GFP_KERNEL alloc should not be under device_list_mutex */
+		seed_devices = btrfs_init_sprout(fs_info);
+		if (IS_ERR(seed_devices)) {
+			btrfs_abort_transaction(trans, (int)PTR_ERR(seed_devices));
  			goto error_trans;
  		}
+	}
+
+	mutex_lock(&fs_devices->device_list_mutex);
+	if (seeding_dev) {
+		btrfs_setup_sprout(fs_info, seed_devices);
+
  		btrfs_assign_next_active_device(fs_info->fs_devices->latest_dev,
  						device);
  	}
  
  	device->fs_devices = fs_devices;
  
-	mutex_lock(&fs_devices->device_list_mutex);
  	mutex_lock(&fs_info->chunk_mutex);
  	list_add_rcu(&device->dev_list, &fs_devices->devices);
  	list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
@@ -2688,7 +2721,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
  
  		/*
  		 * fs_devices now represents the newly sprouted filesystem and
-		 * its fsid has been changed by btrfs_prepare_sprout
+		 * its fsid has been changed by btrfs_sprout_splice().
  		 */
  		btrfs_sysfs_update_sprout_fsid(fs_devices);
  	}
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help