Btrfs: Do snapshot deletion in smaller chunks.
authorChris Mason <chris.mason@oracle.com>
Tue, 7 Aug 2007 19:52:19 +0000 (15:52 -0400)
committerDavid Woodhouse <dwmw2@hera.kernel.org>
Tue, 7 Aug 2007 19:52:19 +0000 (15:52 -0400)
Before, snapshot deletion was a single atomic unit.  This caused considerable
lock contention and required an unbounded amount of space.  Now,
the drop_progress field in the root item is used to indicate how far along
snapshot deletion is, and to resume where it left off.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/transaction.c

index 19a30b7..aa824e2 100644 (file)
@@ -659,9 +659,16 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        struct buffer_head *b;
        struct buffer_head *cow_buf;
        struct btrfs_node *c;
+       struct btrfs_root_item *root_item = &root->root_item;
        int slot;
        int ret;
        int level;
+       u8 lowest_level = 0;
+
+       if (btrfs_root_refs(root_item) == 0 && root->ref_cows) {
+               lowest_level = root_item->drop_level;
+               WARN_ON(ins_len || cow);
+       }
 
        WARN_ON(p->nodes[0] != NULL);
        WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
@@ -718,6 +725,9 @@ again:
                                slot = p->slots[level];
                                BUG_ON(btrfs_header_nritems(&c->header) == 1);
                        }
+                       /* this is only true while dropping a snapshot */
+                       if (level == lowest_level)
+                               break;
                        b = read_tree_block(root, btrfs_node_blockptr(c, slot));
                } else {
                        struct btrfs_leaf *l = (struct btrfs_leaf *)c;
index 0287bd5..73c2e75 100644 (file)
@@ -333,10 +333,10 @@ struct btrfs_root {
        u64 objectid;
        u64 last_trans;
        u32 blocksize;
-       int ref_cows;
        u32 type;
        u64 highest_inode;
        u64 last_inode_alloc;
+       int ref_cows;
 };
 
 /* the lower bits in the key flags defines the item type */
@@ -1073,7 +1073,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf);
 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
-                       *root, struct buffer_head *snap);
+                       *root);
 /* root-item.c */
 int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                   struct btrfs_key *key);
index 5ace2c3..9455974 100644 (file)
@@ -1561,12 +1561,21 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
        int i;
        int slot;
        int ret;
+       struct btrfs_root_item *root_item = &root->root_item;
+
        for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
                slot = path->slots[i];
                if (slot < btrfs_header_nritems(
                    btrfs_buffer_header(path->nodes[i])) - 1) {
+                       struct btrfs_node *node;
+                       node = btrfs_buffer_node(path->nodes[i]);
                        path->slots[i]++;
                        *level = i;
+                       WARN_ON(*level == 0);
+                       memcpy(&root_item->drop_progress,
+                              &node->ptrs[path->slots[i]].key,
+                              sizeof(root_item->drop_progress));
+                       root_item->drop_level = i;
                        return 0;
                } else {
                        ret = btrfs_free_extent(trans, root,
@@ -1587,7 +1596,7 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
  * decremented.
  */
 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
-                       *root, struct buffer_head *snap)
+                       *root)
 {
        int ret = 0;
        int wret;
@@ -1595,14 +1604,33 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
        struct btrfs_path *path;
        int i;
        int orig_level;
+       int num_walks = 0;
+       struct btrfs_root_item *root_item = &root->root_item;
 
        path = btrfs_alloc_path();
        BUG_ON(!path);
 
-       level = btrfs_header_level(btrfs_buffer_header(snap));
+       level = btrfs_header_level(btrfs_buffer_header(root->node));
        orig_level = level;
-       path->nodes[level] = snap;
-       path->slots[level] = 0;
+       if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
+               path->nodes[level] = root->node;
+               path->slots[level] = 0;
+       } else {
+               struct btrfs_key key;
+               struct btrfs_disk_key *found_key;
+               struct btrfs_node *node;
+               btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
+               wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+               if (ret < 0) {
+                       ret = wret;
+                       goto out;
+               }
+               level = root_item->drop_level;
+               node = btrfs_buffer_node(path->nodes[level]);
+               found_key = &node->ptrs[path->slots[level]].key;
+               WARN_ON(memcmp(found_key, &root_item->drop_progress,
+                              sizeof(*found_key)));
+       }
        while(1) {
                wret = walk_down_tree(trans, root, path, &level);
                if (wret > 0)
@@ -1615,12 +1643,21 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
                        break;
                if (wret < 0)
                        ret = wret;
+               num_walks++;
+               if (num_walks > 10) {
+                       struct btrfs_key key;
+                       btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
+                       ret = -EAGAIN;
+                       get_bh(root->node);
+                       break;
+               }
        }
        for (i = 0; i <= orig_level; i++) {
                if (path->nodes[i]) {
                        btrfs_block_release(root, path->nodes[i]);
                }
        }
+out:
        btrfs_free_path(path);
        return ret;
 }
index 0f494fe..4986264 100644 (file)
@@ -213,10 +213,7 @@ static int wait_for_commit(struct btrfs_root *root,
 
 struct dirty_root {
        struct list_head list;
-       struct btrfs_key snap_key;
-       struct buffer_head *commit_root;
        struct btrfs_root *root;
-       int free_on_drop;
 };
 
 int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list)
@@ -226,10 +223,7 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list)
        dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
        if (!dirty)
                return -ENOMEM;
-       memcpy(&dirty->snap_key, &root->root_key, sizeof(root->root_key));
-       dirty->commit_root = root->node;
        dirty->root = root;
-       dirty->free_on_drop = 1;
        list_add(&dirty->list, dead_list);
        return 0;
 }
@@ -241,7 +235,6 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans,
        struct dirty_root *dirty;
        struct btrfs_root *gang[8];
        struct btrfs_root *root;
-       struct btrfs_root_item tmp_item;
        int i;
        int ret;
        int err = 0;
@@ -267,13 +260,16 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans,
                        }
                        dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
                        BUG_ON(!dirty);
-                       memcpy(&dirty->snap_key, &root->root_key,
-                              sizeof(root->root_key));
-                       dirty->commit_root = root->commit_root;
+                       dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS);
+                       BUG_ON(!dirty->root);
+
+                       memset(&root->root_item.drop_progress, 0,
+                              sizeof(struct btrfs_disk_key));
+                       root->root_item.drop_level = 0;
+
+                       memcpy(dirty->root, root, sizeof(*root));
+                       dirty->root->node = root->commit_root;
                        root->commit_root = NULL;
-                       dirty->root = root;
-                       dirty->free_on_drop = 0;
-                       memcpy(&tmp_item, &root->root_item, sizeof(tmp_item));
 
                        root->root_key.offset = root->fs_info->generation;
                        btrfs_set_root_blocknr(&root->root_item,
@@ -283,17 +279,21 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans,
                                                &root->root_item);
                        if (err)
                                break;
-                       refs = btrfs_root_refs(&tmp_item);
-                       btrfs_set_root_refs(&tmp_item, refs - 1);
+
+                       refs = btrfs_root_refs(&dirty->root->root_item);
+                       btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
                        err = btrfs_update_root(trans, root->fs_info->tree_root,
-                                               &dirty->snap_key,
-                                               &tmp_item);
+                                               &dirty->root->root_key,
+                                               &dirty->root->root_item);
 
                        BUG_ON(err);
-                       if (refs == 1)
+                       if (refs == 1) {
                                list_add(&dirty->list, list);
-                       else
+                       } else {
+                               WARN_ON(1);
+                               kfree(dirty->root);
                                kfree(dirty);
+                       }
                }
        }
        return err;
@@ -305,23 +305,36 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
        struct dirty_root *dirty;
        struct btrfs_trans_handle *trans;
        int ret = 0;
+       int err;
+
        while(!list_empty(list)) {
                mutex_lock(&tree_root->fs_info->fs_mutex);
                dirty = list_entry(list->next, struct dirty_root, list);
                list_del_init(&dirty->list);
 
-               trans = btrfs_start_transaction(tree_root, 1);
-               ret = btrfs_drop_snapshot(trans, dirty->root,
-                                         dirty->commit_root);
+               while(1) {
+                       trans = btrfs_start_transaction(tree_root, 1);
+                       ret = btrfs_drop_snapshot(trans, dirty->root);
+                       if (ret != -EAGAIN) {
+                               break;
+                       }
+                       err = btrfs_update_root(trans,
+                                       tree_root,
+                                       &dirty->root->root_key,
+                                       &dirty->root->root_item);
+                       if (err)
+                               ret = err;
+                       ret = btrfs_end_transaction(trans, tree_root);
+                       BUG_ON(ret);
+               }
                BUG_ON(ret);
-               ret = btrfs_del_root(trans, tree_root, &dirty->snap_key);
+               ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key);
                if (ret)
                        break;
                ret = btrfs_end_transaction(trans, tree_root);
                BUG_ON(ret);
 
-               if (dirty->free_on_drop)
-                       kfree(dirty->root);
+               kfree(dirty->root);
                kfree(dirty);
                mutex_unlock(&tree_root->fs_info->fs_mutex);
                btrfs_btree_balance_dirty(tree_root);