Re: WARNING in 2.6.25-07422-gb66e1f1
From: Dan Williams <hidden>
Date: 2008-05-12 17:46:56
Also in:
lkml
Subsystem:
block layer, software raid (multiple disks) support, the rest · Maintainers:
Jens Axboe, Song Liu, Yu Kuai, Linus Torvalds
Possibly related (same subject, not in this thread)
- 2008-05-13 · Re: WARNING in 2.6.25-07422-gb66e1f1 · Neil Brown <hidden>
- 2008-05-09 · Re: WARNING in 2.6.25-07422-gb66e1f1 · Dan Williams <hidden>
- 2008-05-09 · Re: WARNING in 2.6.25-07422-gb66e1f1 · Neil Brown <hidden>
- 2008-05-08 · Re: WARNING in 2.6.25-07422-gb66e1f1 · Dan Williams <hidden>
- 2008-05-08 · Re: WARNING in 2.6.25-07422-gb66e1f1 · Dan Williams <hidden>
On Thu, 2008-05-08 at 22:38 -0700, Neil Brown wrote:
quoted hunk ↗ jump to hunk
On Friday May 9, neilb@suse.de wrote:quoted
On Thursday May 8, dan.j.williams@intel.com wrote:quoted
@@ -133,8 +137,10 @@ static linear_conf_t *linear_conf(mddev_t*mddev, int raid_disks)quoted
quoted
disk->rdev = rdev; + spin_lock(&conf->device_lock); blk_queue_stack_limits(mddev->queue, rdev->bdev->bd_disk->queue); + spin_unlock(&conf->device_lock); /* as we don't honour merge_bvec_fn, we must neverriskquoted
quoted
* violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation.This shouldn't be necessary. There is no actual race here -- mddev->queue->queue_flags is notgoing to bequoted
accessed by anyone else until do_md_run does mddev->queue->make_request_fn = mddev->pers->make_request; which is much later. So we only need to be sure that "queue_is_locked" doesn't complain. And as q->queue_lock is still NULL at this point, it won't complain.Sorry, I got that backwards. It will complain, won't it. :-) I gotta say that I think it shouldn't. Introducing a spinlock in linear.c, raid0.c, multipath.c just to silence a "WARN_ON" seems like the wrong thing to do. Of course we could just use q->__queue_lock so we don't have to add a new lock, but we still have to take the lock unnecessarily. Unfortunately I cannot find a nice solution that both avoids clutter in md code and also protects against carelessly changing flags without a proper lock..... Maybe.... We could get blk_queue_stack_limits to lock the queue, and always spin_lock_init __queue_lock. Then the only change needed in linear.c et al would be to set ->queue_lock to &->__queue_lock. Jens: What do you think of this??diff --git a/block/blk-core.c b/block/blk-core.c index b754a4a..2d31dc2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c@@ -479,6 +479,7 @@ struct request_queue *blk_alloc_queue_node(gfp_tgfp_mask, int node_id) kobject_init(&q->kobj, &blk_queue_ktype); mutex_init(&q->sysfs_lock); + spin_lock_init(&q->__queue_lock); return q; }@@ -541,10 +542,8 @@ blk_init_queue_node(request_fn_proc *rfn,spinlock_t *lock, int node_id) * if caller didn't supply a lock, they get per-queue locking with * our embedded lock */ - if (!lock) { - spin_lock_init(&q->__queue_lock); + if (!lock) lock = &q->__queue_lock; - } q->request_fn = rfn; q->prep_rq_fn = NULL;diff --git a/block/blk-settings.c b/block/blk-settings.c index bb93d4c..488199a 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c@@ -286,8 +286,14 @@ void blk_queue_stack_limits(struct request_queue*t, struct request_queue *b) t->max_hw_segments = min(t->max_hw_segments, b->max_hw_segments); t->max_segment_size = min(t->max_segment_size, b->max_segment_size); t->hardsect_size = max(t->hardsect_size, b->hardsect_size); - if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) + if (!t->queue_lock) + WARN_ON_ONCE(1); + else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { + unsigned long flags; + spin_lock_irqsave(&t->queue_lock, flags); queue_flag_clear(QUEUE_FLAG_CLUSTER, t); + spin_unlock_irqrestore(&t->queue_lock, flags); + } } EXPORT_SYMBOL(blk_queue_stack_limits);diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 0b85117..552f81b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c@@ -250,6 +250,7 @@ static int linear_run (mddev_t *mddev) { linear_conf_t *conf; + mddev->queue_lock = &mddev->__queue_lock; conf = linear_conf(mddev, mddev->raid_disks); if (!conf)diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 42ee1a2..90f85e4 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c@@ -417,6 +417,7 @@ static int multipath_run (mddev_t *mddev) * bookkeeping area. [whatever we allocate in multipath_run(), * should be freed in multipath_stop()] */ + mddev->queue_lock = &mddev->__queue_lock; conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL); mddev->private = conf;diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 818b482..a179c8f 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c@@ -280,6 +280,7 @@ static int raid0_run (mddev_t *mddev) (mddev->chunk_size>>1)-1); blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); blk_queue_segment_boundary(mddev->queue,(mddev->chunk_size>>1) - 1); + mddev->queue_lock = &mddev->__queue_lock; conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL); if (!conf)diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6778b7c..ac409b7 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c@@ -1935,6 +1935,9 @@ static int run(mddev_t *mddev) if (!conf->r1bio_pool) goto out_no_mem; + spin_lock_init(&conf->device_lock); + mddev->queue->queue_lock = &conf->device_lock; + rdev_for_each(rdev, tmp, mddev) { disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks@@ -1958,7 +1961,6 @@ static int run(mddev_t *mddev) } conf->raid_disks = mddev->raid_disks; conf->mddev = mddev; - spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); spin_lock_init(&conf->resync_lock);diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5938fa9..740f670 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c@@ -2082,6 +2082,9 @@ static int run(mddev_t *mddev) goto out_free_conf; } + spin_lock_init(&conf->device_lock); + mddev->queue->queue_lock = &mddev->queue->__queue_lock; + rdev_for_each(rdev, tmp, mddev) { disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks@@ -2103,7 +2106,6 @@ static int run(mddev_t *mddev) disk->head_position = 0; } - spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); spin_lock_init(&conf->resync_lock);diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 087eee0..4fafc79 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c@@ -4256,6 +4256,7 @@ static int run(mddev_t *mddev) goto abort; } spin_lock_init(&conf->device_lock); + mddev->queue->queue_lock = &conf->device_lock; init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_overlap); INIT_LIST_HEAD(&conf->handle_list);
Yes, this is simpler than what I had... spotted some fixups. -- Dan
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 488199a..8dd8641 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c@@ -290,9 +290,9 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) WARN_ON_ONCE(1); else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { unsigned long flags; - spin_lock_irqsave(&t->queue_lock, flags); + spin_lock_irqsave(t->queue_lock, flags); queue_flag_clear(QUEUE_FLAG_CLUSTER, t); - spin_unlock_irqrestore(&t->queue_lock, flags); + spin_unlock_irqrestore(t->queue_lock, flags); } } EXPORT_SYMBOL(blk_queue_stack_limits);
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 552f81b..1074824 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c@@ -250,7 +250,7 @@ static int linear_run (mddev_t *mddev) { linear_conf_t *conf; - mddev->queue_lock = &mddev->__queue_lock; + mddev->queue->queue_lock = &mddev->queue->__queue_lock; conf = linear_conf(mddev, mddev->raid_disks); if (!conf)
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 90f85e4..4f4d1f3 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c@@ -417,7 +417,7 @@ static int multipath_run (mddev_t *mddev) * bookkeeping area. [whatever we allocate in multipath_run(), * should be freed in multipath_stop()] */ - mddev->queue_lock = &mddev->__queue_lock; + mddev->queue->queue_lock = &mddev->queue->__queue_lock; conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL); mddev->private = conf;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index a179c8f..914c04d 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c@@ -280,7 +280,7 @@ static int raid0_run (mddev_t *mddev) (mddev->chunk_size>>1)-1); blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1); - mddev->queue_lock = &mddev->__queue_lock; + mddev->queue->queue_lock = &mddev->queue->__queue_lock; conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL); if (!conf)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f46d448..8536ede 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c@@ -2083,7 +2083,7 @@ static int run(mddev_t *mddev) } spin_lock_init(&conf->device_lock); - mddev->queue->queue_lock = &mddev->queue->__queue_lock; + mddev->queue->queue_lock = &conf->device_lock; rdev_for_each(rdev, tmp, mddev) { disk_idx = rdev->raid_disk;