[PATCH 2/5] Btrfs: fix trans block rsv regression
From: Liu Bo <hidden>
Date: 2012-09-14 09:00:00
Subsystem:
btrfs file system, filesystems (vfs and infrastructure), the rest · Maintainers:
Chris Mason, David Sterba, Alexander Viro, Christian Brauner, Linus Torvalds
In some workloads we have nested joining transaction operations,
eg.
run_delalloc_nocow
btrfs_join_transaction
cow_file_range
btrfs_join_transaction
it can be a serious bug since each trans handler has only two
block_rsv, orig_rsv and block_rsv, which means we may lose our
first block_rsv after two joining transaction operations:
1) btrfs_start_transaction
trans->block_rsv = A
2) btrfs_join_transaction
trans->orig_rsv = trans->block_rsv; ---> orig_rsv is now A
trans->block_rsv = B
3) btrfs_join_transaction
trans->orig_rsv = trans->block_rsv; ---> orig_rsv is now B
trans->block_rsv = C
...
This uses a list of block_rsv instead so that we can either
a) PUSH the old one into the list and use a new one in joining, or
b) POP the old one in ending this transaction.
Signed-off-by: Liu Bo <redacted>
---
fs/btrfs/transaction.c | 25 +++++++++++++++++++++----
fs/btrfs/transaction.h | 7 ++++++-
2 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 0c17d9e..a36ae05 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c@@ -306,9 +306,17 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK && type != TRANS_JOIN_ONLY); h = current->journal_info; - h->use_count++; - h->orig_rsv = h->block_rsv; + if (h->block_rsv) { + struct btrfs_trans_rsv_item *item; + item = kmalloc(sizeof(*item), GFP_NOFS); + if (!item) + return ERR_PTR(-ENOMEM); + item->rsv = h->block_rsv; + INIT_LIST_HEAD(&item->list); + list_add(&item->list, &h->blk_rsv_list); + } h->block_rsv = NULL; + h->use_count++; goto got_it; } else if (type == TRANS_JOIN_ONLY) { return ERR_PTR(-ENOENT);
@@ -367,11 +375,11 @@ again: h->use_count = 1; h->adding_csums = 0; h->block_rsv = NULL; - h->orig_rsv = NULL; h->aborted = 0; h->qgroup_reserved = qgroup_reserved; h->delayed_ref_elem.seq = 0; INIT_LIST_HEAD(&h->qgroup_ref_list); + INIT_LIST_HEAD(&h->blk_rsv_list); smp_mb(); if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -523,7 +531,15 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, int err = 0; if (--trans->use_count) { - trans->block_rsv = trans->orig_rsv; + trans->block_rsv = NULL; + if (!list_empty(&trans->blk_rsv_list)) { + struct btrfs_trans_rsv_item *item; + item = list_entry(trans->blk_rsv_list.next, + struct btrfs_trans_rsv_item, list); + list_del_init(&item->list); + trans->block_rsv = item->rsv; + kfree(item); + } return 0; }
@@ -558,6 +574,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, count++; } btrfs_trans_release_metadata(trans, root); + BUG_ON(!list_empty(&trans->blk_rsv_list)); trans->block_rsv = NULL; sb_end_intwrite(root->fs_info->sb);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 59adf55..7fa11b7 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h@@ -57,7 +57,6 @@ struct btrfs_trans_handle { unsigned long delayed_ref_updates; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; - struct btrfs_block_rsv *orig_rsv; int aborted; int adding_csums; /*
@@ -68,6 +67,12 @@ struct btrfs_trans_handle { struct btrfs_root *root; struct seq_list delayed_ref_elem; struct list_head qgroup_ref_list; + struct list_head blk_rsv_list; +}; + +struct btrfs_trans_rsv_item { + struct btrfs_block_rsv *rsv; + struct list_head list; }; struct btrfs_pending_snapshot {
--
1.7.7.6