Btrfs: Data ordered fixes
authorChris Mason <chris.mason@oracle.com>
Mon, 21 Jul 2008 14:29:44 +0000 (10:29 -0400)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:04:05 +0000 (11:04 -0400)
* In btrfs_delete_inode, wait for ordered extents after calling
truncate_inode_pages.  This is much faster, and more correct

* Properly clear our the PageChecked bit everywhere we redirty the page.

* Change the writepage fixup handler to lock the page range and check to
see if an ordered extent had been inserted since the improperly dirtied
page was discovered

* Wait for ordered extents outside the transaction.  This isn't required
for locking rules but does improve transaction latencies

* Reduce contention on the alloc_mutex by dropping it while incrementing
refs on a node/leaf and while dropping refs on a leaf.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/extent-tree.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ordered-data.c

index be2aef1..ccd4932 100644 (file)
@@ -934,7 +934,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        if (!root->ref_cows)
                return 0;
 
-       mutex_lock(&root->fs_info->alloc_mutex);
        level = btrfs_header_level(buf);
        nritems = btrfs_header_nritems(buf);
        for (i = 0; i < nritems; i++) {
@@ -951,29 +950,36 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
                        if (disk_bytenr == 0)
                                continue;
+
+                       mutex_lock(&root->fs_info->alloc_mutex);
                        ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
                                    btrfs_file_extent_disk_num_bytes(buf, fi),
                                    root->root_key.objectid, trans->transid,
                                    key.objectid, key.offset);
+                       mutex_unlock(&root->fs_info->alloc_mutex);
                        if (ret) {
                                faili = i;
+                               WARN_ON(1);
                                goto fail;
                        }
                } else {
                        bytenr = btrfs_node_blockptr(buf, i);
                        btrfs_node_key_to_cpu(buf, &key, i);
+
+                       mutex_lock(&root->fs_info->alloc_mutex);
                        ret = __btrfs_inc_extent_ref(trans, root, bytenr,
                                           btrfs_level_size(root, level - 1),
                                           root->root_key.objectid,
                                           trans->transid,
                                           level - 1, key.objectid);
+                       mutex_unlock(&root->fs_info->alloc_mutex);
                        if (ret) {
                                faili = i;
+                               WARN_ON(1);
                                goto fail;
                        }
                }
        }
-       mutex_unlock(&root->fs_info->alloc_mutex);
        return 0;
 fail:
        WARN_ON(1);
@@ -1004,7 +1010,6 @@ fail:
                }
        }
 #endif
-       mutex_unlock(&root->fs_info->alloc_mutex);
        return ret;
 }
 
@@ -2180,6 +2185,8 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
        leaf_owner = btrfs_header_owner(leaf);
        leaf_generation = btrfs_header_generation(leaf);
 
+       mutex_unlock(&root->fs_info->alloc_mutex);
+
        for (i = 0; i < nritems; i++) {
                u64 disk_bytenr;
 
@@ -2197,12 +2204,17 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
                disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
                if (disk_bytenr == 0)
                        continue;
+
+               mutex_lock(&root->fs_info->alloc_mutex);
                ret = __btrfs_free_extent(trans, root, disk_bytenr,
                                btrfs_file_extent_disk_num_bytes(leaf, fi),
                                leaf_owner, leaf_generation,
                                key.objectid, key.offset, 0);
+               mutex_unlock(&root->fs_info->alloc_mutex);
                BUG_ON(ret);
        }
+
+       mutex_lock(&root->fs_info->alloc_mutex);
        return 0;
 }
 
index eccdb95..591a302 100644 (file)
@@ -75,6 +75,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
        for (i = 0; i < num_pages; i++) {
                if (!pages[i])
                        break;
+               ClearPageChecked(pages[i]);
                unlock_page(pages[i]);
                mark_page_accessed(pages[i]);
                page_cache_release(pages[i]);
index 50ee4be..8fb6dc2 100644 (file)
@@ -418,7 +418,7 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 
        fixup = container_of(work, struct btrfs_writepage_fixup, work);
        page = fixup->page;
-
+again:
        lock_page(page);
        if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
                ClearPageChecked(page);
@@ -430,9 +430,21 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work)
        page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
 
        lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
-       ordered = btrfs_lookup_ordered_extent(inode, page_start);
-       if (ordered)
+
+       /* already ordered? We're done */
+       if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
+                            EXTENT_ORDERED, 0)) {
                goto out;
+       }
+
+       ordered = btrfs_lookup_ordered_extent(inode, page_start);
+       if (ordered) {
+               unlock_extent(&BTRFS_I(inode)->io_tree, page_start,
+                             page_end, GFP_NOFS);
+               unlock_page(page);
+               btrfs_start_ordered_extent(inode, ordered, 1);
+               goto again;
+       }
 
        set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end,
                            GFP_NOFS);
@@ -1465,11 +1477,11 @@ void btrfs_delete_inode(struct inode *inode)
        unsigned long nr;
        int ret;
 
-       btrfs_wait_ordered_range(inode, 0, (u64)-1);
        truncate_inode_pages(&inode->i_data, 0);
        if (is_bad_inode(inode)) {
                goto no_delete;
        }
+       btrfs_wait_ordered_range(inode, 0, (u64)-1);
 
        btrfs_i_size_write(inode, 0);
        trans = btrfs_start_transaction(root, 1);
@@ -2707,6 +2719,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
                 1, 1, GFP_NOFS);
        __btrfs_releasepage(page, GFP_NOFS);
 
+       ClearPageChecked(page);
        if (PagePrivate(page)) {
                invalidate_extent_lru(tree, page_offset(page),
                                      PAGE_CACHE_SIZE);
@@ -2818,10 +2831,10 @@ static void btrfs_truncate(struct inode *inode)
                return;
 
        btrfs_truncate_page(inode->i_mapping, inode->i_size);
+       btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
 
        trans = btrfs_start_transaction(root, 1);
        btrfs_set_trans_block_group(trans, inode);
-       btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
        btrfs_i_size_write(inode, inode->i_size);
 
        /* FIXME, add redo link to tree so we don't leak on crash */
index c2b4a9c..0d87795 100644 (file)
@@ -336,7 +336,7 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
                orig_end = start + len - 1;
                wait_end = orig_end;
        }
-
+again:
        /* start IO across the range first to instantiate any delalloc
         * extents
         */
@@ -369,6 +369,14 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
                        break;
                end--;
        }
+       if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
+                          EXTENT_ORDERED | EXTENT_DELALLOC, 0)) {
+               printk("inode %lu still ordered or delalloc after wait "
+                      "%llu %llu\n", inode->i_ino,
+                      (unsigned long long)start,
+                      (unsigned long long)orig_end);
+               goto again;
+       }
 }
 
 /*
@@ -545,7 +553,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
                        sector_sums = &ordered_sum->sums;
                        for (i = 0; i < num_sectors; i++) {
                                if (sector_sums[i].offset == offset) {
-printk("find ordered sum inode %lu offset %Lu\n", inode->i_ino, offset);
                                        *sum = sector_sums[i].sum;
                                        ret = 0;
                                        goto out;