Re: [PATCH md-6.9 06/10] md/raid1: factor out read_first_rdev() from read_balance()
From: Yu Kuai <hidden>
Date: 2024-02-27 01:06:52
Also in:
lkml
Hi, 在 2024/02/26 22:16, Xiao Ni 写道:
On Thu, Feb 22, 2024 at 4:04 PM Yu Kuai [off-list ref] wrote:quoted
From: Yu Kuai <redacted> read_balance() is hard to understand because there are too many status and branches, and it's overlong. This patch factor out the case to read the first rdev from read_balance(), there are no functional changes. Co-developed-by: Paul Luse <redacted> Signed-off-by: Paul Luse <redacted> Signed-off-by: Yu Kuai <redacted> --- drivers/md/raid1.c | 63 +++++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 17 deletions(-)diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 8089c569e84f..08c45ca55a7e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c@@ -579,6 +579,47 @@ static sector_t align_to_barrier_unit_end(sector_t start_sector, return len; } +static void update_read_sectors(struct r1conf *conf, int disk, + sector_t this_sector, int len) +{ + struct raid1_info *info = &conf->mirrors[disk]; + + atomic_inc(&info->rdev->nr_pending); + if (info->next_seq_sect != this_sector) + info->seq_start = this_sector; + info->next_seq_sect = this_sector + len; +} + +static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio, + int *max_sectors) +{ + sector_t this_sector = r1_bio->sector; + int len = r1_bio->sectors; + int disk; + + for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) { + struct md_rdev *rdev; + int read_len; + + if (r1_bio->bios[disk] == IO_BLOCKED) + continue; + + rdev = conf->mirrors[disk].rdev; + if (!rdev || test_bit(Faulty, &rdev->flags)) + continue; + + /* choose the first disk even if it has some bad blocks. */ + read_len = raid1_check_read_range(rdev, this_sector, &len); + if (read_len > 0) { + update_read_sectors(conf, disk, this_sector, read_len); + *max_sectors = read_len; + return disk; + }Hi Kuai It needs to update max_sectors even if the bad block starts before this_sector. Because it can't read more than bad_blocks from other member disks. If it reads more data than bad blocks, it will cause data corruption. One rule here is read from the primary disk (the first readable disk) if it has no bad block and read the badblock-data-length data from other disks.
Noted that raid1_check_read_range() will return readable length from this rdev, hence if bad block starts before this_sector, 0 is returned, and 'len' is updated to the length of badblocks(if not exceed read range), and following iteration will find the first disk to read updated 'len' data and update max_sectors. Thanks, Kuai
Best Regards Xiaoquoted
+ } + + return -1; +} + /* * This routine returns the disk from which the requested read should * be done. There is a per-array 'next expected sequential IO' sector@@ -603,7 +644,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect sector_t best_dist; unsigned int min_pending; struct md_rdev *rdev; - int choose_first; retry: sectors = r1_bio->sectors;@@ -613,10 +653,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect best_pending_disk = -1; min_pending = UINT_MAX; best_good_sectors = 0; - choose_first = raid1_should_read_first(conf->mddev, this_sector, - sectors); clear_bit(R1BIO_FailFast, &r1_bio->state); + if (raid1_should_read_first(conf->mddev, this_sector, sectors)) + return choose_first_rdev(conf, r1_bio, max_sectors); + for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) { sector_t dist; sector_t first_bad;@@ -662,8 +703,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect * bad_sectors from another device.. */ bad_sectors -= (this_sector - first_bad); - if (choose_first && sectors > bad_sectors) - sectors = bad_sectors; if (best_good_sectors > sectors) best_good_sectors = sectors;@@ -673,8 +712,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect best_good_sectors = good_sectors; best_disk = disk; } - if (choose_first) - break; } continue; } else {@@ -689,10 +726,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect pending = atomic_read(&rdev->nr_pending); dist = abs(this_sector - conf->mirrors[disk].head_position); - if (choose_first) { - best_disk = disk; - break; - } /* Don't change to another disk for sequential reads */ if (conf->mirrors[disk].next_seq_sect == this_sector || dist == 0) {@@ -760,13 +793,9 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect rdev = conf->mirrors[best_disk].rdev; if (!rdev) goto retry; - atomic_inc(&rdev->nr_pending); - sectors = best_good_sectors; - - if (conf->mirrors[best_disk].next_seq_sect != this_sector) - conf->mirrors[best_disk].seq_start = this_sector; - conf->mirrors[best_disk].next_seq_sect = this_sector + sectors; + sectors = best_good_sectors; + update_read_sectors(conf, disk, this_sector, sectors); } *max_sectors = sectors; --2.39.2.