Re: WARNING in 2.6.25-07422-gb66e1f1

From: Dan Williams <hidden>
Date: 2008-05-12 17:46:56
Also in: lkml
Subsystem: block layer, software raid (multiple disks) support, the rest · Maintainers: Jens Axboe, Song Liu, Yu Kuai, Linus Torvalds

Possibly related (same subject, not in this thread)

2008-05-13 · Re: WARNING in 2.6.25-07422-gb66e1f1 · Neil Brown <hidden>
2008-05-09 · Re: WARNING in 2.6.25-07422-gb66e1f1 · Dan Williams <hidden>
2008-05-09 · Re: WARNING in 2.6.25-07422-gb66e1f1 · Neil Brown <hidden>
2008-05-08 · Re: WARNING in 2.6.25-07422-gb66e1f1 · Dan Williams <hidden>
2008-05-08 · Re: WARNING in 2.6.25-07422-gb66e1f1 · Dan Williams <hidden>

On Thu, 2008-05-08 at 22:38 -0700, Neil Brown wrote:

quoted hunk ↗ jump to hunk

On Friday May 9, neilb@suse.de wrote:

quoted

On Thursday May 8, dan.j.williams@intel.com wrote:

quoted

@@ -133,8 +137,10 @@ static linear_conf_t *linear_conf(mddev_t

*mddev, int raid_disks)

quoted

            disk->rdev = rdev;

+           spin_lock(&conf->device_lock);
            blk_queue_stack_limits(mddev->queue,
                                   rdev->bdev->bd_disk->queue);
+           spin_unlock(&conf->device_lock);
            /* as we don't honour merge_bvec_fn, we must never

risk

quoted

             * violating it, so limit ->max_sector to one PAGE, as
             * a one page request is never in violation.

This shouldn't be necessary.
There is no actual race here -- mddev->queue->queue_flags is not

going to be

quoted

accessed by anyone else until do_md_run does
      mddev->queue->make_request_fn = mddev->pers->make_request;
which is much later.
So we only need to be sure that "queue_is_locked" doesn't complain.
And as q->queue_lock is still NULL at this point, it won't complain.

Sorry, I got that backwards.  It will complain, won't it. :-)

I gotta say that I think it shouldn't.  Introducing a spinlock in
linear.c, raid0.c, multipath.c just to silence a "WARN_ON" seems like
the wrong thing to do.  Of course we could just use q->__queue_lock so
we don't have to add a new lock, but we still have to take the lock
unnecessarily.

Unfortunately I cannot find a nice solution that both avoids clutter
in md code and also protects against carelessly changing flags without
a proper lock.....

Maybe....
We could get blk_queue_stack_limits to lock the queue, and always
spin_lock_init __queue_lock.  Then the only change needed in linear.c
et al would be to set ->queue_lock to &->__queue_lock.

Jens:  What do you think of this??

diff --git a/block/blk-core.c b/block/blk-core.c
index b754a4a..2d31dc2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c

@@ -479,6 +479,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t

gfp_mask, int node_id)
        kobject_init(&q->kobj, &blk_queue_ktype);

        mutex_init(&q->sysfs_lock);
+       spin_lock_init(&q->__queue_lock);

        return q;
 }

@@ -541,10 +542,8 @@ blk_init_queue_node(request_fn_proc *rfn,

spinlock_t *lock, int node_id)
         * if caller didn't supply a lock, they get per-queue locking
with
         * our embedded lock
         */
-       if (!lock) {
-               spin_lock_init(&q->__queue_lock);
+       if (!lock)
                lock = &q->__queue_lock;
-       }

        q->request_fn           = rfn;
        q->prep_rq_fn           = NULL;

diff --git a/block/blk-settings.c b/block/blk-settings.c
index bb93d4c..488199a 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c

@@ -286,8 +286,14 @@ void blk_queue_stack_limits(struct request_queue

*t, struct request_queue *b)
        t->max_hw_segments = min(t->max_hw_segments,
b->max_hw_segments);
        t->max_segment_size = min(t->max_segment_size,
b->max_segment_size);
        t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
-       if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
+       if (!t->queue_lock)
+               WARN_ON_ONCE(1);
+       else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
+               unsigned long flags;
+               spin_lock_irqsave(&t->queue_lock, flags);
                queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
+               spin_unlock_irqrestore(&t->queue_lock, flags);
+       }
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);

diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 0b85117..552f81b 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c

@@ -250,6 +250,7 @@ static int linear_run (mddev_t *mddev)
 {
        linear_conf_t *conf;

+       mddev->queue_lock = &mddev->__queue_lock;
        conf = linear_conf(mddev, mddev->raid_disks);

        if (!conf)

diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 42ee1a2..90f85e4 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c

@@ -417,6 +417,7 @@ static int multipath_run (mddev_t *mddev)
         * bookkeeping area. [whatever we allocate in multipath_run(),
         * should be freed in multipath_stop()]
         */
+       mddev->queue_lock = &mddev->__queue_lock;

        conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL);
        mddev->private = conf;

diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 818b482..a179c8f 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c

@@ -280,6 +280,7 @@ static int raid0_run (mddev_t *mddev)
               (mddev->chunk_size>>1)-1);
        blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9);
        blk_queue_segment_boundary(mddev->queue,

(mddev->chunk_size>>1) - 1);
+       mddev->queue_lock = &mddev->__queue_lock;

        conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL);
        if (!conf)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 6778b7c..ac409b7 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c

@@ -1935,6 +1935,9 @@ static int run(mddev_t *mddev)
        if (!conf->r1bio_pool)
                goto out_no_mem;

+       spin_lock_init(&conf->device_lock);
+       mddev->queue->queue_lock = &conf->device_lock;
+
        rdev_for_each(rdev, tmp, mddev) {
                disk_idx = rdev->raid_disk;
                if (disk_idx >= mddev->raid_disks

@@ -1958,7 +1961,6 @@ static int run(mddev_t *mddev)
        }
        conf->raid_disks = mddev->raid_disks;
        conf->mddev = mddev;
-       spin_lock_init(&conf->device_lock);
        INIT_LIST_HEAD(&conf->retry_list);

        spin_lock_init(&conf->resync_lock);

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 5938fa9..740f670 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c

@@ -2082,6 +2082,9 @@ static int run(mddev_t *mddev)
                goto out_free_conf;
        }

+       spin_lock_init(&conf->device_lock);
+       mddev->queue->queue_lock = &mddev->queue->__queue_lock;
+
        rdev_for_each(rdev, tmp, mddev) {
                disk_idx = rdev->raid_disk;
                if (disk_idx >= mddev->raid_disks

@@ -2103,7 +2106,6 @@ static int run(mddev_t *mddev)

                disk->head_position = 0;
        }
-       spin_lock_init(&conf->device_lock);
        INIT_LIST_HEAD(&conf->retry_list);

        spin_lock_init(&conf->resync_lock);

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 087eee0..4fafc79 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c

@@ -4256,6 +4256,7 @@ static int run(mddev_t *mddev)
                        goto abort;
        }
        spin_lock_init(&conf->device_lock);
+       mddev->queue->queue_lock = &conf->device_lock;
        init_waitqueue_head(&conf->wait_for_stripe);
        init_waitqueue_head(&conf->wait_for_overlap);
        INIT_LIST_HEAD(&conf->handle_list);

Yes, this is simpler than what I had... spotted some fixups.

--
Dan

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 488199a..8dd8641 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c

@@ -290,9 +290,9 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 		WARN_ON_ONCE(1);
 	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
 		unsigned long flags;
-		spin_lock_irqsave(&t->queue_lock, flags);
+		spin_lock_irqsave(t->queue_lock, flags);
 		queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
-		spin_unlock_irqrestore(&t->queue_lock, flags);
+		spin_unlock_irqrestore(t->queue_lock, flags);
 	}
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);

diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 552f81b..1074824 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c

@@ -250,7 +250,7 @@ static int linear_run (mddev_t *mddev)
 {
 	linear_conf_t *conf;
 
-	mddev->queue_lock = &mddev->__queue_lock;
+	mddev->queue->queue_lock = &mddev->queue->__queue_lock;
 	conf = linear_conf(mddev, mddev->raid_disks);
 
 	if (!conf)

diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 90f85e4..4f4d1f3 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c

@@ -417,7 +417,7 @@ static int multipath_run (mddev_t *mddev)
 	 * bookkeeping area. [whatever we allocate in multipath_run(),
 	 * should be freed in multipath_stop()]
 	 */
-	mddev->queue_lock = &mddev->__queue_lock;
+	mddev->queue->queue_lock = &mddev->queue->__queue_lock;
 
 	conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL);
 	mddev->private = conf;

diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index a179c8f..914c04d 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c

@@ -280,7 +280,7 @@ static int raid0_run (mddev_t *mddev)
 	       (mddev->chunk_size>>1)-1);
 	blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9);
 	blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1);
-	mddev->queue_lock = &mddev->__queue_lock;
+	mddev->queue->queue_lock = &mddev->queue->__queue_lock;
 
 	conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL);
 	if (!conf)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f46d448..8536ede 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c

@@ -2083,7 +2083,7 @@ static int run(mddev_t *mddev)
 	}
 
 	spin_lock_init(&conf->device_lock);
-	mddev->queue->queue_lock = &mddev->queue->__queue_lock;
+	mddev->queue->queue_lock = &conf->device_lock;
 
 	rdev_for_each(rdev, tmp, mddev) {
 		disk_idx = rdev->raid_disk;

`h`	back out one level
`j`	next message in thread
`k`	previous message in thread
`l`	drill in
`Esc`	close help / fold thread tree
`?`	toggle this help