Re: [md PATCH 10/14] md/raid1: stop using bi_phys_segment

From: Ming Lei <tom.leiming@gmail.com>
Date: 2017-02-20 10:57:38

On Thu, Feb 16, 2017 at 12:39 PM, NeilBrown [off-list ref] wrote:

quoted hunk ↗ jump to hunk

Change to use bio->__bi_remaining to count number of r1bio attached
to a bio.
See precious raid10 patch for more details.

inc_pending is a little more complicated in raid1 as we need to adjust
next_window_requests or current_window_requests.

The wait_event() call if start_next_window is no longer needed as each
r1_bio is completed separately.

Signed-off-by: NeilBrown <redacted>
---
 drivers/md/raid1.c |   99 ++++++++++++++++++----------------------------------
 1 file changed, 34 insertions(+), 65 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c1d0675880fb..5a57111c7bc9 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c

@@ -241,36 +241,19 @@ static void reschedule_retry(struct r1bio *r1_bio)
 static void call_bio_endio(struct r1bio *r1_bio)
 {
        struct bio *bio = r1_bio->master_bio;
-       int done;
        struct r1conf *conf = r1_bio->mddev->private;
        sector_t start_next_window = r1_bio->start_next_window;
        sector_t bi_sector = bio->bi_iter.bi_sector;

-       if (bio->bi_phys_segments) {
-               unsigned long flags;
-               spin_lock_irqsave(&conf->device_lock, flags);
-               bio->bi_phys_segments--;
-               done = (bio->bi_phys_segments == 0);
-               spin_unlock_irqrestore(&conf->device_lock, flags);
-               /*
-                * make_request() might be waiting for
-                * bi_phys_segments to decrease
-                */
-               wake_up(&conf->wait_barrier);
-       } else
-               done = 1;
-
        if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
                bio->bi_error = -EIO;

-       if (done) {
-               bio_endio(bio);
-               /*
-                * Wake up any possible resync thread that waits for the device
-                * to go idle.
-                */
-               allow_barrier(conf, start_next_window, bi_sector);
-       }
+       bio_endio(bio);
+       /*
+        * Wake up any possible resync thread that waits for the device
+        * to go idle.
+        */
+       allow_barrier(conf, start_next_window, bi_sector);
 }

 static void raid_end_bio_io(struct r1bio *r1_bio)

@@ -923,6 +906,26 @@ static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
        return sector;
 }

+static void inc_pending(struct r1conf *conf, sector_t start_next_window,
+                       sector_t bi_sector)
+{
+       /* The current request requires multiple r1_bio, so
+        * we need to increment the pending count, and the corresponding
+        * window count.
+        */
+       spin_lock(&conf->resync_lock);
+       conf->nr_pending++;

Just be curious, in current code 'nr_pending' is increased in wait_barrier(),
and looks this patch introduces inc_pending() to do that on each r10_bio, but
not see any change in wait_barrier(), so that means there might be issue in
current implementation about operating on this counter?

quoted hunk ↗ jump to hunk

+       if (start_next_window == conf->start_next_window) {
+               if (conf->start_next_window + NEXT_NORMALIO_DISTANCE
+                   <= bi_sector)
+                       conf->next_window_requests++;
+               else
+                       conf->current_window_requests++;
+       } else
+               conf->current_window_requests++;
+       spin_unlock(&conf->resync_lock);
+}
+
 static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
                          sector_t bi_sector)
 {

@@ -1137,12 +1140,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
                sectors_handled = (r1_bio->sector + max_sectors
                                   - bio->bi_iter.bi_sector);
                r1_bio->sectors = max_sectors;
-               spin_lock_irq(&conf->device_lock);
-               if (bio->bi_phys_segments == 0)
-                       bio->bi_phys_segments = 2;
-               else
-                       bio->bi_phys_segments++;
-               spin_unlock_irq(&conf->device_lock);
+               bio_inc_remaining(bio);

                /*
                 * Cannot call generic_make_request directly as that will be
@@ -1304,7 +1302,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
        if (unlikely(blocked_rdev)) {
                /* Wait for this device to become unblocked */
                int j;
-               sector_t old = start_next_window;

                for (j = 0; j < i; j++)
                        if (r1_bio->bios[j])
@@ -1314,15 +1311,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
                raid1_log(mddev, "wait rdev %d blocked", blocked_rdev->raid_disk);
                md_wait_for_blocked_rdev(blocked_rdev, mddev);
                start_next_window = wait_barrier(conf, bio);
-               /*
-                * We must make sure the multi r1bios of bio have
-                * the same value of bi_phys_segments
-                */
-               if (bio->bi_phys_segments && old &&
-                   old != start_next_window)
-                       /* Wait for the former r1bio(s) to complete */
-                       wait_event(conf->wait_barrier,
-                                  bio->bi_phys_segments == 1);
                goto retry_write;
        }
@@ -1417,22 +1405,18 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
         * as it could result in the bio being freed.
         */
        if (sectors_handled < bio_sectors(bio)) {
-               /* We need another r1_bio, which must be accounted
-                * in bio->bi_phys_segments
-                */
-               spin_lock_irq(&conf->device_lock);
-               if (bio->bi_phys_segments == 0)
-                       bio->bi_phys_segments = 2;
-               else
-                       bio->bi_phys_segments++;
-               spin_unlock_irq(&conf->device_lock);
+               /* We need another r1_bio, which must be counted */
+               sector_t sect = bio->bi_iter.bi_sector + sectors_handled;
+
+               inc_pending(conf, start_next_window, sect);
+               bio_inc_remaining(bio);
                r1_bio_write_done(r1_bio);
                r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
                r1_bio->master_bio = bio;
                r1_bio->sectors = bio_sectors(bio) - sectors_handled;
                r1_bio->state = 0;
                r1_bio->mddev = mddev;
-               r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+               r1_bio->sector = sect;
                goto retry_write;
        }
@@ -1460,16 +1444,6 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio)
        r1_bio->mddev = mddev;
        r1_bio->sector = bio->bi_iter.bi_sector;

-       /*
-        * We might need to issue multiple reads to different devices if there
-        * are bad blocks around, so we keep track of the number of reads in
-        * bio->bi_phys_segments.  If this is 0, there is only one r1_bio and
-        * no locking will be needed when requests complete.  If it is
-        * non-zero, then it is the number of not-completed requests.
-        */
-       bio->bi_phys_segments = 0;
-       bio_clear_flag(bio, BIO_SEG_VALID);
-
        if (bio_data_dir(bio) == READ)
                raid1_read_request(mddev, bio, r1_bio);
        else
@@ -2434,12 +2408,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
                        int sectors_handled = (r1_bio->sector + max_sectors
                                               - mbio->bi_iter.bi_sector);
                        r1_bio->sectors = max_sectors;
-                       spin_lock_irq(&conf->device_lock);
-                       if (mbio->bi_phys_segments == 0)
-                               mbio->bi_phys_segments = 2;
-                       else
-                               mbio->bi_phys_segments++;
-                       spin_unlock_irq(&conf->device_lock);
+                       bio_inc_remaining(mbio);
                        trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
                                              bio, bio_dev, bio_sector);
                        generic_make_request(bio);


--

To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Ming Lei

`h`	back out one level
`j`	next message in thread
`k`	previous message in thread
`l`	drill in
`Esc`	close help / fold thread tree
`?`	toggle this help