Thread (13 messages) 13 messages, 2 authors, 2012-06-21

Re: [patch 3/9 v2] raid5: lockless access raid5 overrided bi_phys_segments

From: Dan Williams <hidden>
Date: 2012-06-21 01:56:10

On Mon, Jun 18, 2012 at 6:59 PM, Shaohua Li [off-list ref] wrote:
quoted hunk ↗ jump to hunk
Raid5 overrides bio->bi_phys_segments, accessing it is with device_lock hold,
which is unnecessary, We can make it lockless actually.

Signed-off-by: Shaohua Li <redacted>
---
 drivers/md/raid5.c |   58 +++++++++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 26 deletions(-)

Index: linux/drivers/md/raid5.c
===================================================================
--- linux.orig/drivers/md/raid5.c       2012-06-08 11:48:25.652618012 +0800
+++ linux/drivers/md/raid5.c    2012-06-08 13:15:31.458919391 +0800
@@ -99,34 +99,40 @@ static inline struct bio *r5_next_bio(st
 * We maintain a biased count of active stripes in the bottom 16 bits of
 * bi_phys_segments, and a count of processed stripes in the upper 16 bits
 */
-static inline int raid5_bi_phys_segments(struct bio *bio)
+static inline int raid5_bi_processed_stripes(struct bio *bio)
 {
-       return bio->bi_phys_segments & 0xffff;
+       atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
+       return (atomic_read(segments) >> 16) & 0xffff;
 }

-static inline int raid5_bi_hw_segments(struct bio *bio)
+static inline int raid5_dec_bi_active_stripes(struct bio *bio)
 {
-       return (bio->bi_phys_segments >> 16) & 0xffff;
+       atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
+       return atomic_sub_return(1, segments) & 0xffff;
 }

-static inline int raid5_dec_bi_phys_segments(struct bio *bio)
+static inline void raid5_inc_bi_active_stripes(struct bio *bio)
 {
-       --bio->bi_phys_segments;
-       return raid5_bi_phys_segments(bio);
+       atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
+       atomic_inc(segments);
 }

-static inline int raid5_dec_bi_hw_segments(struct bio *bio)
+static inline void raid5_set_bi_processed_stripes(struct bio *bio,
+       unsigned int cnt)
 {
-       unsigned short val = raid5_bi_hw_segments(bio);
+       atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
+       int old, new;

-       --val;
-       bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio);
-       return val;
+       do {
+               old = atomic_read(segments);
+               new = (old & 0xffff) | (cnt << 16);
+       } while (atomic_cmpxchg(segments, old, new) != old);
 }

-static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
+static inline void raid5_set_bi_stripes(struct bio *bio, unsigned int cnt)
 {
-       bio->bi_phys_segments = raid5_bi_phys_segments(bio) | (cnt << 16);
+       atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
+       atomic_set(segments, cnt);
 }

 /* Find first data disk in a raid6 stripe */
@@ -767,7 +773,7 @@ static void ops_complete_biofill(void *s
                       while (rbi && rbi->bi_sector <
                               dev->sector + STRIPE_SECTORS) {
                               rbi2 = r5_next_bio(rbi, dev->sector);
-                               if (!raid5_dec_bi_phys_segments(rbi)) {
+                               if (!raid5_dec_bi_active_stripes(rbi)) {
                                       rbi->bi_next = return_bi;
                                       return_bi = rbi;
                               }
@@ -2354,7 +2360,7 @@ static int add_stripe_bio(struct stripe_
       if (*bip)
               bi->bi_next = *bip;
       *bip = bi;
-       bi->bi_phys_segments++;
+       raid5_inc_bi_active_stripes(bi);
Now that device_lock does not globally sync bio ingress/egress would
it be worth having a a comment here that says this ordering of adding
bi to the to{read|write} list is safe because make_request holds a
'segments' reference count until the bio has been added to all
affected stripes, and that within a given stripe bios are still synced
via stripe_lock?
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help