Re: [PATCH v7] btrfs: consolidate device_list_mutex in prepare_sprout to its parent
From: Anand Jain <hidden>
Date: 2021-09-30 13:42:30
On 30/09/2021 20:16, Anand Jain wrote:
btrfs_prepare_sprout() splices seed devices into its own struct fs_devices, so that its parent function btrfs_init_new_device() can add the new sprout device to fs_info->fs_devices. Both btrfs_prepare_sprout() and btrfs_init_new_device() needs device_list_mutex. But they are holding it sequentially, thus creates a small window to an opportunity to race. Close this opportunity and hold device_list_mutex common to both btrfs_init_new_device() and btrfs_prepare_sprout(). This patch splits btrfs_prepare_sprout() into btrfs_alloc_sprout() and btrfs_splice_sprout(). This split is essential because device_list_mutex shouldn't be held for btrfs_alloc_sprout() but must be held for btrfs_splice_sprout(). So now a common device_list_mutex can be used between btrfs_init_new_device() and btrfs_splice_sprout().
s/btrfs_alloc_sprout/btrfs_init_sprout/g s/btrfs_splice_sprout/btrfs_setup_sprout/g The changelog did not follow the new function names. My bad. Before I update these and send another reroll, I will wait for the comments, if any. Thanks, Anand
quoted hunk ↗ jump to hunk
This patch also moves the lockdep_assert_held(&uuid_mutex) from the starting of the function to just above the line where we need this lock. Signed-off-by: Anand Jain <redacted> --- v7: . Not part of the patchset "btrfs: cleanup prepare_sprout" anymore as 1/3 is merged and 2/3 is dropped. . Rename btrfs_alloc_sprout() to btrfs_init_sprout() as it does more than just alloc and change return to btrfs_device *. . Rename btrfs_splice_sprout() to btrfs_setup_sprout() as it does more than just the splice. . Add lockdep_assert_held(&uuid_mutex) and lockdep_assert_held(&fs_devices->device_list_mutex) in btrfs_setup_sprout(). v6: Remove RFC. Split btrfs_prepare_sprout so that the allocation part can be outside of the device_list_mutex in the parent function btrfs_init_new_device(). fs/btrfs/volumes.c | 73 +++++++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 20 deletions(-)diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8e2b76b5fd14..10227b13a1a6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c@@ -2378,21 +2378,14 @@ struct btrfs_device *btrfs_find_device_by_devspec( return btrfs_find_device_by_path(fs_info, device_path); } -/* - * does all the dirty work required for changing file system's UUID. - */ -static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) +static struct btrfs_fs_devices *btrfs_init_sprout(struct btrfs_fs_info *fs_info) { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_fs_devices *old_devices; struct btrfs_fs_devices *seed_devices; - struct btrfs_super_block *disk_super = fs_info->super_copy; - struct btrfs_device *device; - u64 super_flags; - lockdep_assert_held(&uuid_mutex); if (!fs_devices->seeding) - return -EINVAL; + return ERR_PTR(-EINVAL); /* * Private copy of the seed devices, anchored at@@ -2400,7 +2393,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) */ seed_devices = alloc_fs_devices(NULL, NULL); if (IS_ERR(seed_devices)) - return PTR_ERR(seed_devices); + return seed_devices; /* * It's necessary to retain a copy of the original seed fs_devices in@@ -2411,9 +2404,10 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) old_devices = clone_fs_devices(fs_devices); if (IS_ERR(old_devices)) { kfree(seed_devices); - return PTR_ERR(old_devices); + return old_devices; } + lockdep_assert_held(&uuid_mutex); list_add(&old_devices->fs_list, &fs_uuids); memcpy(seed_devices, fs_devices, sizeof(*seed_devices));@@ -2422,7 +2416,41 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) INIT_LIST_HEAD(&seed_devices->alloc_list); mutex_init(&seed_devices->device_list_mutex); - mutex_lock(&fs_devices->device_list_mutex); + return seed_devices; +} + +/* + * Splice seed devices into the sprout fs_devices. + * Generate a new fsid for the sprouted readwrite btrfs. + */ +static void btrfs_setup_sprout(struct btrfs_fs_info *fs_info, + struct btrfs_fs_devices *seed_devices) +{ + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + struct btrfs_super_block *disk_super = fs_info->super_copy; + struct btrfs_device *device; + u64 super_flags; + + /* + * We are updating the fsid, the thread leading to device_list_add() + * could race, so uuid_mutex is needed. + */ + lockdep_assert_held(&uuid_mutex); + + /* + * Below threads though they parse dev_list they are fine without + * device_list_mutex: + * All device ops and balance - as we are in btrfs_exclop_start. + * Various dev_list read parser - are using rcu. + * btrfs_ioctl_fitrim() - is using rcu. + * + * For-read threads as below are using device_list_mutex: + * Readonly scrub btrfs_scrub_dev() + * Readonly scrub btrfs_scrub_progress() + * btrfs_get_dev_stats() + */ + lockdep_assert_held(&fs_devices->device_list_mutex); + list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, synchronize_rcu); list_for_each_entry(device, &seed_devices->devices, dev_list)@@ -2438,13 +2466,10 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) generate_random_uuid(fs_devices->fsid); memcpy(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE); memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); - mutex_unlock(&fs_devices->device_list_mutex); super_flags = btrfs_super_flags(disk_super) & ~BTRFS_SUPER_FLAG_SEEDING; btrfs_set_super_flags(disk_super, super_flags); - - return 0; } /*@@ -2532,6 +2557,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path struct super_block *sb = fs_info->sb; struct rcu_string *name; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + struct btrfs_fs_devices *seed_devices; u64 orig_super_total_bytes; u64 orig_super_num_devices; int ret = 0;@@ -2615,18 +2641,25 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path if (seeding_dev) { btrfs_clear_sb_rdonly(sb); - ret = btrfs_prepare_sprout(fs_info); - if (ret) { - btrfs_abort_transaction(trans, ret); + + /* GFP_KERNEL alloc should not be under device_list_mutex */ + seed_devices = btrfs_init_sprout(fs_info); + if (IS_ERR(seed_devices)) { + btrfs_abort_transaction(trans, (int)PTR_ERR(seed_devices)); goto error_trans; } + } + + mutex_lock(&fs_devices->device_list_mutex); + if (seeding_dev) { + btrfs_setup_sprout(fs_info, seed_devices); + btrfs_assign_next_active_device(fs_info->fs_devices->latest_dev, device); } device->fs_devices = fs_devices; - mutex_lock(&fs_devices->device_list_mutex); mutex_lock(&fs_info->chunk_mutex); list_add_rcu(&device->dev_list, &fs_devices->devices); list_add(&device->dev_alloc_list, &fs_devices->alloc_list);@@ -2688,7 +2721,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path /* * fs_devices now represents the newly sprouted filesystem and - * its fsid has been changed by btrfs_prepare_sprout + * its fsid has been changed by btrfs_sprout_splice(). */ btrfs_sysfs_update_sprout_fsid(fs_devices); }