[PATCH 1/4] Btrfs: use radix tree for checksum
From: Liu Bo <hidden>
Date: 2012-06-13 10:11:59
Subsystem:
btrfs file system, filesystems (vfs and infrastructure), the rest · Maintainers:
Chris Mason, David Sterba, Alexander Viro, Christian Brauner, Linus Torvalds
We used to issue a checksum to an extent state of 4K range for read endio, but now we want to use larger range for performance optimization, so instead we create a radix tree for checksum, where an item stands for checksum of 4K data. Signed-off-by: Liu Bo <redacted> --- fs/btrfs/extent_io.c | 84 ++++++++++++-------------------------------------- fs/btrfs/extent_io.h | 2 + fs/btrfs/inode.c | 7 +--- 3 files changed, 23 insertions(+), 70 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2c8f7b2..2923ede 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c@@ -117,10 +117,12 @@ void extent_io_tree_init(struct extent_io_tree *tree, { tree->state = RB_ROOT; INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC); + INIT_RADIX_TREE(&tree->csum, GFP_ATOMIC); tree->ops = NULL; tree->dirty_bytes = 0; spin_lock_init(&tree->lock); spin_lock_init(&tree->buffer_lock); + spin_lock_init(&tree->csum_lock); tree->mapping = mapping; }
@@ -703,15 +705,6 @@ static void cache_state(struct extent_state *state, } } -static void uncache_state(struct extent_state **cached_ptr) -{ - if (cached_ptr && (*cached_ptr)) { - struct extent_state *state = *cached_ptr; - *cached_ptr = NULL; - free_extent_state(state); - } -} - /* * set some bits on a range in the tree. This may require allocations or * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -1666,56 +1659,32 @@ out: */ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) { - struct rb_node *node; - struct extent_state *state; int ret = 0; - spin_lock(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(tree, start); - if (!node) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; - } - state->private = private; -out: - spin_unlock(&tree->lock); + spin_lock(&tree->csum_lock); + ret = radix_tree_insert(&tree->csum, (unsigned long)start, + (void *)((unsigned long)private << 1)); + BUG_ON(ret); + spin_unlock(&tree->csum_lock); return ret; } int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) { - struct rb_node *node; - struct extent_state *state; - int ret = 0; + void **slot = NULL; - spin_lock(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(tree, start); - if (!node) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; + spin_lock(&tree->csum_lock); + slot = radix_tree_lookup_slot(&tree->csum, (unsigned long)start); + if (!slot) { + spin_unlock(&tree->csum_lock); + return -ENOENT; } - *private = state->private; -out: - spin_unlock(&tree->lock); - return ret; + *private = (u64)(*slot) >> 1; + + radix_tree_delete(&tree->csum, (unsigned long)start); + spin_unlock(&tree->csum_lock); + + return 0; } /*
@@ -2294,7 +2263,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) do { struct page *page = bvec->bv_page; struct extent_state *cached = NULL; - struct extent_state *state; pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, " "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err,
@@ -2313,21 +2281,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (++bvec <= bvec_end) prefetchw(&bvec->bv_page->flags); - spin_lock(&tree->lock); - state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); - if (state && state->start == start) { - /* - * take a reference on the state, unlock will drop - * the ref - */ - cache_state(state, &cached); - } - spin_unlock(&tree->lock); - mirror = (int)(unsigned long)bio->bi_bdev; if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { ret = tree->ops->readpage_end_io_hook(page, start, end, - state, mirror); + NULL, mirror); if (ret) { /* no IO indicated but software detected errors * in the block, either checksum errors or
@@ -2369,7 +2326,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) uptodate = 0; - uncache_state(&cached); continue; } }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 25900af..c896962 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h@@ -96,11 +96,13 @@ struct extent_io_ops { struct extent_io_tree { struct rb_root state; struct radix_tree_root buffer; + struct radix_tree_root csum; struct address_space *mapping; u64 dirty_bytes; int track_uptodate; spinlock_t lock; spinlock_t buffer_lock; + spinlock_t csum_lock; struct extent_io_ops *ops; };
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f6ab6f5..da0da44 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c@@ -2008,12 +2008,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, return 0; } - if (state && state->start == start) { - private = state->private; - ret = 0; - } else { - ret = get_state_private(io_tree, start, &private); - } + ret = get_state_private(io_tree, start, &private); kaddr = kmap_atomic(page); if (ret) goto zeroit;
--
1.6.5.2