Btrfs: Improve and cleanup locking done by walk_down_tree
authorChris Mason <chris.mason@oracle.com>
Fri, 1 Aug 2008 15:27:23 +0000 (11:27 -0400)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:04:06 +0000 (11:04 -0400)
While dropping snapshots, walk_down_tree does most of the work of checking
reference counts and limiting tree traversal to just the blocks that
we are freeing.

It dropped and held the allocation mutex in strange and confusing ways,
this commit changes it to only hold the mutex while actually freeing a block.

The rest of the checks around reference counts should be safe without the lock
because we only allow one process in btrfs_drop_snapshot at a time.  Other
processes dropping reference counts should not drop it to 1 because
their tree roots already have an extra ref on the block.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/extent-tree.c
fs/btrfs/file.c
fs/btrfs/ioctl.c

index b003b43..58bceee 100644 (file)
@@ -2333,8 +2333,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
        leaf_owner = btrfs_header_owner(leaf);
        leaf_generation = btrfs_header_generation(leaf);
 
-       mutex_unlock(&root->fs_info->alloc_mutex);
-
        for (i = 0; i < nritems; i++) {
                u64 disk_bytenr;
                cond_resched();
@@ -2362,8 +2360,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
                mutex_unlock(&root->fs_info->alloc_mutex);
                BUG_ON(ret);
        }
-
-       mutex_lock(&root->fs_info->alloc_mutex);
        return 0;
 }
 
@@ -2375,7 +2371,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
        int ret;
        struct btrfs_extent_info *info = ref->extents;
 
-       mutex_unlock(&root->fs_info->alloc_mutex);
        for (i = 0; i < ref->nritems; i++) {
                mutex_lock(&root->fs_info->alloc_mutex);
                ret = __btrfs_free_extent(trans, root,
@@ -2386,7 +2381,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
                BUG_ON(ret);
                info++;
        }
-       mutex_lock(&root->fs_info->alloc_mutex);
 
        return 0;
 }
@@ -2440,10 +2434,39 @@ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
                              u32 *refs)
 {
        int ret;
-       mutex_unlock(&root->fs_info->alloc_mutex);
+
        ret = lookup_extent_ref(NULL, root, start, len, refs);
+       BUG_ON(ret);
+
+#if 0 // some debugging code in case we see problems here
+       /* if the refs count is one, it won't get increased again.  But
+        * if the ref count is > 1, someone may be decreasing it at
+        * the same time we are.
+        */
+       if (*refs != 1) {
+               struct extent_buffer *eb = NULL;
+               eb = btrfs_find_create_tree_block(root, start, len);
+               if (eb)
+                       btrfs_tree_lock(eb);
+
+               mutex_lock(&root->fs_info->alloc_mutex);
+               ret = lookup_extent_ref(NULL, root, start, len, refs);
+               BUG_ON(ret);
+               mutex_unlock(&root->fs_info->alloc_mutex);
+
+               if (eb) {
+                       btrfs_tree_unlock(eb);
+                       free_extent_buffer(eb);
+               }
+               if (*refs == 1) {
+                       printk("block %llu went down to one during drop_snap\n",
+                              (unsigned long long)start);
+               }
+
+       }
+#endif
+
        cond_resched();
-       mutex_lock(&root->fs_info->alloc_mutex);
        return ret;
 }
 
@@ -2467,8 +2490,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
        int ret;
        u32 refs;
 
-       mutex_lock(&root->fs_info->alloc_mutex);
-
        WARN_ON(*level < 0);
        WARN_ON(*level >= BTRFS_MAX_LEVEL);
        ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
@@ -2507,13 +2528,21 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
                        root_owner = btrfs_header_owner(parent);
                        root_gen = btrfs_header_generation(parent);
                        path->slots[*level]++;
+
+                       mutex_lock(&root->fs_info->alloc_mutex);
                        ret = __btrfs_free_extent(trans, root, bytenr,
                                                blocksize, root_owner,
                                                root_gen, 0, 0, 1);
                        BUG_ON(ret);
+                       mutex_unlock(&root->fs_info->alloc_mutex);
                        continue;
                }
-
+               /*
+                * at this point, we have a single ref, and since the
+                * only place referencing this extent is a dead root
+                * the reference count should never go higher.
+                * So, we don't need to check it again
+                */
                if (*level == 1) {
                        struct btrfs_key key;
                        btrfs_node_key_to_cpu(cur, &key, path->slots[*level]);
@@ -2533,33 +2562,23 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
                next = btrfs_find_tree_block(root, bytenr, blocksize);
                if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
                        free_extent_buffer(next);
-                       mutex_unlock(&root->fs_info->alloc_mutex);
 
                        if (path->slots[*level] == 0)
                                reada_walk_down(root, cur, path->slots[*level]);
                        next = read_tree_block(root, bytenr, blocksize,
                                               ptr_gen);
                        cond_resched();
-                       mutex_lock(&root->fs_info->alloc_mutex);
-
-                       /* we've dropped the lock, double check */
+#if 0
+                       /*
+                        * this is a debugging check and can go away
+                        * the ref should never go all the way down to 1
+                        * at this point
+                        */
                        ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
                                                &refs);
                        BUG_ON(ret);
-                       if (refs != 1) {
-                               parent = path->nodes[*level];
-                               root_owner = btrfs_header_owner(parent);
-                               root_gen = btrfs_header_generation(parent);
-
-                               path->slots[*level]++;
-                               free_extent_buffer(next);
-                               ret = __btrfs_free_extent(trans, root, bytenr,
-                                                       blocksize,
-                                                       root_owner,
-                                                       root_gen, 0, 0, 1);
-                               BUG_ON(ret);
-                               continue;
-                       }
+                       WARN_ON(refs != 1);
+#endif
                }
                WARN_ON(*level <= 0);
                if (path->nodes[*level-1])
@@ -2584,6 +2603,8 @@ out:
        root_owner = btrfs_header_owner(parent);
        root_gen = btrfs_header_generation(parent);
 
+
+       mutex_lock(&root->fs_info->alloc_mutex);
        ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
                                  root_owner, root_gen, 0, 0, 1);
        free_extent_buffer(path->nodes[*level]);
@@ -2591,6 +2612,7 @@ out:
        *level += 1;
        BUG_ON(ret);
        mutex_unlock(&root->fs_info->alloc_mutex);
+
        cond_resched();
        return 0;
 }
@@ -2834,6 +2856,11 @@ again:
                }
                set_page_extent_mapped(page);
 
+               /*
+                * make sure page_mkwrite is called for this page if userland
+                * wants to change it from mmap
+                */
+               clear_page_dirty_for_io(page);
 
                set_extent_delalloc(io_tree, page_start,
                                    page_end, GFP_NOFS);
index c78f184..8915f2d 100644 (file)
@@ -338,6 +338,13 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
                btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1);
                BUG_ON(err);
                mutex_unlock(&BTRFS_I(inode)->extent_mutex);
+
+               /*
+                * an ugly way to do all the prop accounting around
+                * the page bits and mapping tags
+                */
+               set_page_writeback(pages[0]);
+               end_page_writeback(pages[0]);
                did_inline = 1;
        }
        if (end_pos > isize) {
@@ -833,11 +840,7 @@ again:
                              start_pos, last_pos - 1, GFP_NOFS);
        }
        for (i = 0; i < num_pages; i++) {
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-               ClearPageDirty(pages[i]);
-#else
-               cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
-#endif
+               clear_page_dirty_for_io(pages[i]);
                set_page_extent_mapped(pages[i]);
                WARN_ON(!PageLocked(pages[i]));
        }
index 5e62774..224da28 100644 (file)
@@ -268,6 +268,12 @@ again:
                }
                set_page_extent_mapped(page);
 
+               /*
+                * this makes sure page_mkwrite is called on the
+                * page if it is dirtied again later
+                */
+               clear_page_dirty_for_io(page);
+
                set_extent_delalloc(io_tree, page_start,
                                    page_end, GFP_NOFS);