Re: [md PATCH 2/2] md: be cautious about using ->curr_resync_completed for ->recovery_offset
From: NeilBrown <hidden>
Date: 2017-11-01 01:13:22
Subsystem:
software raid (multiple disks) support, the rest · Maintainers:
Song Liu, Yu Kuai, Linus Torvalds
On Tue, Oct 31 2017, Tomasz Majchrzak wrote:
On Tue, Oct 17, 2017 at 04:18:36PM +1100, NeilBrown wrote:quoted
The ->recovery_offset shows how much of a non-InSync device is actually in sync - how much has been recoveryed. When performing a recovery, ->curr_resync and ->curr_resync_completed follow the device address being recovered and so can be used to update ->recovery_offset. When performing a reshape, ->curr_resync* might follow the device addresses (raid5) or might follow array addresses (raid10), so cannot in general be used to set ->recovery_offset. When reshaping backwards, ->curre_resync* measures from the *end* of the array-or-device, so is particularly unhelpful. So change the common code in md.c to only use ->curr_resync_complete for the simple recovery case, and add code to raid5.c to update ->recovery_offset during a forwards reshape. Signed-off-by: NeilBrown <redacted> --- drivers/md/md.c | 32 +++++++++++++++++++++----------- drivers/md/raid5.c | 18 ++++++++++++++++++ 2 files changed, 39 insertions(+), 11 deletions(-)diff --git a/drivers/md/md.c b/drivers/md/md.c index a9f1352b3849..d1dfc9879368 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c@@ -2453,10 +2453,17 @@ void md_update_sb(struct mddev *mddev, int force_change) } } - /* First make sure individual recovery_offsets are correct */ + /* First make sure individual recovery_offsets are correct + * curr_resync_completed can only be used during recovery. + * During reshape/resync it might use array-addresses rather + * that device addresses. + */ rdev_for_each(rdev, mddev) { if (rdev->raid_disk >= 0 && mddev->delta_disks >= 0 && + test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && + test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) && + !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && !test_bit(Journal, &rdev->flags) && !test_bit(In_sync, &rdev->flags) && mddev->curr_resync_completed > rdev->recovery_offset)@@ -8507,16 +8514,19 @@ void md_do_sync(struct md_thread *thread) } else { if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) mddev->curr_resync = MaxSector; - rcu_read_lock(); - rdev_for_each_rcu(rdev, mddev) - if (rdev->raid_disk >= 0 && - mddev->delta_disks >= 0 && - !test_bit(Journal, &rdev->flags) && - !test_bit(Faulty, &rdev->flags) && - !test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < mddev->curr_resync) - rdev->recovery_offset = mddev->curr_resync; - rcu_read_unlock(); + if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && + test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) { + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) + if (rdev->raid_disk >= 0 && + mddev->delta_disks >= 0 && + !test_bit(Journal, &rdev->flags) && + !test_bit(Faulty, &rdev->flags) && + !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < mddev->curr_resync) + rdev->recovery_offset = mddev->curr_resync; + rcu_read_unlock(); + } } } skip:diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a8df52130f8a..89ad79614309 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c@@ -5739,6 +5739,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk */ struct r5conf *conf = mddev->private; struct stripe_head *sh; + struct md_rdev *rdev; sector_t first_sector, last_sector; int raid_disks = conf->previous_raid_disks; int data_disks = raid_disks - conf->max_degraded;@@ -5861,6 +5862,15 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk return 0; mddev->reshape_position = conf->reshape_progress; mddev->curr_resync_completed = sector_nr; + if (!mddev->reshape_backwards) + /* Can update recovery_offset */ + rdev_for_each(rdev, mddev) + if (rdev->raid_disk >= 0 && + !test_bit(Journal, &rdev->flags) && + !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < sector_nr) + rdev->recovery_offset = sector_nr; + conf->reshape_checkpoint = jiffies; set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); md_wakeup_thread(mddev->thread);@@ -5959,6 +5969,14 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk goto ret; mddev->reshape_position = conf->reshape_progress; mddev->curr_resync_completed = sector_nr; + if (!mddev->reshape_backwards) + /* Can update recovery_offset */ + rdev_for_each(rdev, mddev) + if (rdev->raid_disk >= 0 && + !test_bit(Journal, &rdev->flags) && + !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < sector_nr) + rdev->recovery_offset = sector_nr; conf->reshape_checkpoint = jiffies; set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); md_wakeup_thread(mddev->thread); --Hi, This patch has caused a regression in following IMSM RAID scenario:
That's unfortunate.
mdadm --create /dev/md/imsm0 --metadata=imsm --raid-devices=4 /dev/nvme[0-3]n1 --run
mdadm --create /dev/md/r10 --level=10 --size=100M --raid-devices=4 /dev/nvme[0-3]n1 --run --assume-clean
MDADM_EXPERIMENTAL=1 mdadm --grow /dev/md/r10 --level=0
MDADM_EXPERIMENTAL=1 mdadm --grow /dev/md/imsm0 --raid-devices=4
cat /proc/mdstat
Personalities : [raid10] [raid0] [raid6] [raid5] [raid4]
md126 : active raid4 nvme0n1[3] nvme2n1[2] nvme1n1[1]
409600 blocks super external:/md127/0 level 4, 128k chunk, algorithm 0 [5/3] [UU_U_]
md127 : inactive nvme3n1[3](S) nvme2n1[2](S) nvme1n1[1](S) nvme0n1[0](S)
4420 blocks super external:imsmWhy isn't nvme3n1 in md126? Did it not get added, or did it get removed for some reason? If it wasn't added, raid5_start_reshape() would have failed, so I guess it must have been removed??
The array should have been reshaped to 4-disk RAID0 but it has failed in the middle of reshape as RAID4. Following condition is not true for above scenario:quoted
+ if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && + test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
No, it isn't meant to be. This code doesn't work reliably in the reshape case. When reshaping raid5 (or raid4), reshape_request updates rdev->recovery_offset. ... maybe end_reshape needs to update it too. If you add this patch, does it help? Thanks, NeilBrown
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 89ad79614309..a99ae1be9175 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c@@ -7964,6 +7964,7 @@ static void end_reshape(struct r5conf *conf) { if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { + struct md_rdev *rdev; spin_lock_irq(&conf->device_lock); conf->previous_raid_disks = conf->raid_disks;
@@ -7971,6 +7972,12 @@ static void end_reshape(struct r5conf *conf) smp_wmb(); conf->reshape_progress = MaxSector; conf->mddev->reshape_position = MaxSector; + rdev_for_each(rdev, conf->mddev) + if (rdev->raid_disk >= 0 && + !test_bit(Journal, &rdev->flags) && + !test_bit(In_sync, &rdev->flags)) + rdev->recovery_offset = MaxSector; + spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_for_overlap);
Attachments
- signature.asc [application/pgp-signature] 832 bytes