Re: [PATCH md-6.9 06/10] md/raid1: factor out read_first_rdev() from read_balance()

From: Yu Kuai <hidden>
Date: 2024-02-27 01:06:52
Also in: lkml

Hi,

在 2024/02/26 22:16, Xiao Ni 写道:

On Thu, Feb 22, 2024 at 4:04 PM Yu Kuai [off-list ref] wrote:

quoted

From: Yu Kuai <redacted>

read_balance() is hard to understand because there are too many status
and branches, and it's overlong.

This patch factor out the case to read the first rdev from
read_balance(), there are no functional changes.

Co-developed-by: Paul Luse <redacted>
Signed-off-by: Paul Luse <redacted>
Signed-off-by: Yu Kuai <redacted>
---
  drivers/md/raid1.c | 63 +++++++++++++++++++++++++++++++++-------------
  1 file changed, 46 insertions(+), 17 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 8089c569e84f..08c45ca55a7e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c

@@ -579,6 +579,47 @@ static sector_t align_to_barrier_unit_end(sector_t start_sector,
         return len;
  }

+static void update_read_sectors(struct r1conf *conf, int disk,
+                               sector_t this_sector, int len)
+{
+       struct raid1_info *info = &conf->mirrors[disk];
+
+       atomic_inc(&info->rdev->nr_pending);
+       if (info->next_seq_sect != this_sector)
+               info->seq_start = this_sector;
+       info->next_seq_sect = this_sector + len;
+}
+
+static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio,
+                            int *max_sectors)
+{
+       sector_t this_sector = r1_bio->sector;
+       int len = r1_bio->sectors;
+       int disk;
+
+       for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
+               struct md_rdev *rdev;
+               int read_len;
+
+               if (r1_bio->bios[disk] == IO_BLOCKED)
+                       continue;
+
+               rdev = conf->mirrors[disk].rdev;
+               if (!rdev || test_bit(Faulty, &rdev->flags))
+                       continue;
+
+               /* choose the first disk even if it has some bad blocks. */
+               read_len = raid1_check_read_range(rdev, this_sector, &len);
+               if (read_len > 0) {
+                       update_read_sectors(conf, disk, this_sector, read_len);
+                       *max_sectors = read_len;
+                       return disk;
+               }

Hi Kuai

It needs to update max_sectors even if the bad block starts before
this_sector. Because it can't read more than bad_blocks from other
member disks. If it reads more data than bad blocks, it will cause
data corruption. One rule here is read from the primary disk (the
first readable disk) if it has no bad block and read the
badblock-data-length data from other disks.

Noted that raid1_check_read_range() will return readable length from
this rdev, hence if bad block starts before this_sector, 0 is returned,
and 'len' is updated to the length of badblocks(if not exceed read
range), and following iteration will find the first disk to read updated
'len' data and update max_sectors.

Thanks,
Kuai

Best Regards
Xiao

quoted

+       }
+
+       return -1;
+}
+
  /*
   * This routine returns the disk from which the requested read should
   * be done. There is a per-array 'next expected sequential IO' sector

@@ -603,7 +644,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
         sector_t best_dist;
         unsigned int min_pending;
         struct md_rdev *rdev;
-       int choose_first;

   retry:
         sectors = r1_bio->sectors;
@@ -613,10 +653,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
         best_pending_disk = -1;
         min_pending = UINT_MAX;
         best_good_sectors = 0;
-       choose_first = raid1_should_read_first(conf->mddev, this_sector,
-                                              sectors);
         clear_bit(R1BIO_FailFast, &r1_bio->state);

+       if (raid1_should_read_first(conf->mddev, this_sector, sectors))
+               return choose_first_rdev(conf, r1_bio, max_sectors);
+
         for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
                 sector_t dist;
                 sector_t first_bad;
@@ -662,8 +703,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
                                  * bad_sectors from another device..
                                  */
                                 bad_sectors -= (this_sector - first_bad);
-                               if (choose_first && sectors > bad_sectors)
-                                       sectors = bad_sectors;
                                 if (best_good_sectors > sectors)
                                         best_good_sectors = sectors;
@@ -673,8 +712,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
                                         best_good_sectors = good_sectors;
                                         best_disk = disk;
                                 }
-                               if (choose_first)
-                                       break;
                         }
                         continue;
                 } else {
@@ -689,10 +726,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect

                 pending = atomic_read(&rdev->nr_pending);
                 dist = abs(this_sector - conf->mirrors[disk].head_position);
-               if (choose_first) {
-                       best_disk = disk;
-                       break;
-               }
                 /* Don't change to another disk for sequential reads */
                 if (conf->mirrors[disk].next_seq_sect == this_sector
                     || dist == 0) {
@@ -760,13 +793,9 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
                 rdev = conf->mirrors[best_disk].rdev;
                 if (!rdev)
                         goto retry;
-               atomic_inc(&rdev->nr_pending);
-               sectors = best_good_sectors;
-
-               if (conf->mirrors[best_disk].next_seq_sect != this_sector)
-                       conf->mirrors[best_disk].seq_start = this_sector;

-               conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
+               sectors = best_good_sectors;
+               update_read_sectors(conf, disk, this_sector, sectors);
         }
         *max_sectors = sectors;

--

2.39.2

`h`	back out one level
`j`	next message in thread
`k`	previous message in thread
`l`	drill in
`Esc`	close help / fold thread tree
`?`	toggle this help