mm: task dirty accounting fix
[safe/jmp/linux-2.6] / fs / btrfs / disk-io.c
index 26a1877..adda739 100644 (file)
@@ -75,6 +75,40 @@ struct async_submit_bio {
        struct btrfs_work work;
 };
 
+/* These are used to set the lockdep class on the extent buffer locks.
+ * The class is set by the readpage_end_io_hook after the buffer has
+ * passed csum validation but before the pages are unlocked.
+ *
+ * The lockdep class is also set by btrfs_init_new_buffer on freshly
+ * allocated blocks.
+ *
+ * The class is based on the level in the tree block, which allows lockdep
+ * to know that lower nodes nest inside the locks of higher nodes.
+ *
+ * We also add a check to make sure the highest level of the tree is
+ * the same as our lockdep setup here.  If BTRFS_MAX_LEVEL changes, this
+ * code needs update as well.
+ */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# if BTRFS_MAX_LEVEL != 8
+#  error
+# endif
+static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
+static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
+       /* leaf */
+       "btrfs-extent-00",
+       "btrfs-extent-01",
+       "btrfs-extent-02",
+       "btrfs-extent-03",
+       "btrfs-extent-04",
+       "btrfs-extent-05",
+       "btrfs-extent-06",
+       "btrfs-extent-07",
+       /* highest possible level */
+       "btrfs-extent-08",
+};
+#endif
+
 /*
  * extents on the btree inode are pretty simple, there's one extent
  * that covers the entire device
@@ -347,6 +381,15 @@ static int check_tree_block_fsid(struct btrfs_root *root,
        return ret;
 }
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
+{
+       lockdep_set_class_and_name(&eb->lock,
+                          &btrfs_eb_class[level],
+                          btrfs_eb_name[level]);
+}
+#endif
+
 static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
                               struct extent_state *state)
 {
@@ -392,6 +435,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
        }
        found_level = btrfs_header_level(eb);
 
+       btrfs_set_buffer_lockdep_class(eb, found_level);
+
        ret = csum_tree_block(root, eb, 1);
        if (ret)
                ret = -EIO;
@@ -799,7 +844,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
        ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
 
        if (ret == 0)
-               buf->flags |= EXTENT_UPTODATE;
+               set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
        else
                WARN_ON(1);
        return buf;
@@ -813,6 +858,10 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        if (btrfs_header_generation(buf) ==
            root->fs_info->running_transaction->transid) {
                WARN_ON(!btrfs_tree_locked(buf));
+
+               /* ugh, clear_extent_buffer_dirty can be expensive */
+               btrfs_set_lock_blocking(buf);
+
                clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
                                          buf);
        }
@@ -849,6 +898,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        spin_lock_init(&root->list_lock);
        mutex_init(&root->objectid_mutex);
        mutex_init(&root->log_mutex);
+       init_waitqueue_head(&root->log_writer_wait);
+       init_waitqueue_head(&root->log_commit_wait[0]);
+       init_waitqueue_head(&root->log_commit_wait[1]);
+       atomic_set(&root->log_commit[0], 0);
+       atomic_set(&root->log_commit[1], 0);
+       atomic_set(&root->log_writers, 0);
+       root->log_batch = 0;
+       root->log_transid = 0;
        extent_io_tree_init(&root->dirty_log_pages,
                             fs_info->btree_inode->i_mapping, GFP_NOFS);
 
@@ -933,15 +990,16 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
-                            struct btrfs_fs_info *fs_info)
+static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
+                                        struct btrfs_fs_info *fs_info)
 {
        struct btrfs_root *root;
        struct btrfs_root *tree_root = fs_info->tree_root;
+       struct extent_buffer *leaf;
 
        root = kzalloc(sizeof(*root), GFP_NOFS);
        if (!root)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        __setup_root(tree_root->nodesize, tree_root->leafsize,
                     tree_root->sectorsize, tree_root->stripesize,
@@ -950,12 +1008,23 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
        root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
        root->root_key.type = BTRFS_ROOT_ITEM_KEY;
        root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
+       /*
+        * log trees do not get reference counted because they go away
+        * before a real commit is actually done.  They do store pointers
+        * to file data extents, and those reference counts still get
+        * updated (along with back refs to the log tree).
+        */
        root->ref_cows = 0;
 
-       root->node = btrfs_alloc_free_block(trans, root, root->leafsize,
-                                           0, BTRFS_TREE_LOG_OBJECTID,
-                                           trans->transid, 0, 0, 0);
+       leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
+                                     0, BTRFS_TREE_LOG_OBJECTID,
+                                     trans->transid, 0, 0, 0);
+       if (IS_ERR(leaf)) {
+               kfree(root);
+               return ERR_CAST(leaf);
+       }
 
+       root->node = leaf;
        btrfs_set_header_nritems(root->node, 0);
        btrfs_set_header_level(root->node, 0);
        btrfs_set_header_bytenr(root->node, root->node->start);
@@ -967,7 +1036,48 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
                            BTRFS_FSID_SIZE);
        btrfs_mark_buffer_dirty(root->node);
        btrfs_tree_unlock(root->node);
-       fs_info->log_root_tree = root;
+       return root;
+}
+
+int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
+                            struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_root *log_root;
+
+       log_root = alloc_log_tree(trans, fs_info);
+       if (IS_ERR(log_root))
+               return PTR_ERR(log_root);
+       WARN_ON(fs_info->log_root_tree);
+       fs_info->log_root_tree = log_root;
+       return 0;
+}
+
+int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *root)
+{
+       struct btrfs_root *log_root;
+       struct btrfs_inode_item *inode_item;
+
+       log_root = alloc_log_tree(trans, root->fs_info);
+       if (IS_ERR(log_root))
+               return PTR_ERR(log_root);
+
+       log_root->last_trans = trans->transid;
+       log_root->root_key.offset = root->root_key.objectid;
+
+       inode_item = &log_root->root_item.inode;
+       inode_item->generation = cpu_to_le64(1);
+       inode_item->size = cpu_to_le64(3);
+       inode_item->nlink = cpu_to_le32(1);
+       inode_item->nbytes = cpu_to_le64(root->leafsize);
+       inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
+
+       btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start);
+       btrfs_set_root_generation(&log_root->root_item, trans->transid);
+
+       WARN_ON(root->log_root);
+       root->log_root = log_root;
+       root->log_transid = 0;
        return 0;
 }
 
@@ -1442,7 +1552,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        INIT_LIST_HEAD(&fs_info->dead_roots);
        INIT_LIST_HEAD(&fs_info->hashers);
        INIT_LIST_HEAD(&fs_info->delalloc_inodes);
-       spin_lock_init(&fs_info->hash_lock);
        spin_lock_init(&fs_info->delalloc_lock);
        spin_lock_init(&fs_info->new_trans_lock);
        spin_lock_init(&fs_info->ref_cache_lock);
@@ -1530,10 +1639,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        init_waitqueue_head(&fs_info->transaction_throttle);
        init_waitqueue_head(&fs_info->transaction_wait);
        init_waitqueue_head(&fs_info->async_submit_wait);
-       init_waitqueue_head(&fs_info->tree_log_wait);
-       atomic_set(&fs_info->tree_log_commit, 0);
-       atomic_set(&fs_info->tree_log_writers, 0);
-       fs_info->tree_log_transid = 0;
 
        __setup_root(4096, 4096, 4096, 4096, tree_root,
                     fs_info, BTRFS_ROOT_TREE_OBJECTID);
@@ -1622,6 +1727,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
         * low idle thresh
         */
        fs_info->endio_workers.idle_thresh = 4;
+       fs_info->endio_meta_workers.idle_thresh = 4;
+
        fs_info->endio_write_workers.idle_thresh = 64;
        fs_info->endio_meta_write_workers.idle_thresh = 64;
 
@@ -1715,7 +1822,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        ret = find_and_setup_root(tree_root, fs_info,
                                  BTRFS_DEV_TREE_OBJECTID, dev_root);
        dev_root->track_dirty = 1;
-
        if (ret)
                goto fail_extent_root;
 
@@ -1823,13 +1929,14 @@ fail_sb_buffer:
 fail_iput:
        invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
        iput(fs_info->btree_inode);
-fail:
+
        btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
+       bdi_destroy(&fs_info->bdi);
 
+fail:
        kfree(extent_root);
        kfree(tree_root);
-       bdi_destroy(&fs_info->bdi);
        kfree(fs_info);
        kfree(chunk_root);
        kfree(dev_root);
@@ -2252,6 +2359,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
        u64 transid = btrfs_header_generation(buf);
        struct inode *btree_inode = root->fs_info->btree_inode;
 
+       btrfs_set_lock_blocking(buf);
+
        WARN_ON(!btrfs_tree_locked(buf));
        if (transid != root->fs_info->generation) {
                printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
@@ -2294,14 +2403,13 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
        int ret;
        ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
        if (ret == 0)
-               buf->flags |= EXTENT_UPTODATE;
+               set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
        return ret;
 }
 
 int btree_lock_page_hook(struct page *page)
 {
        struct inode *inode = page->mapping->host;
-       struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct extent_buffer *eb;
        unsigned long len;
@@ -2316,9 +2424,7 @@ int btree_lock_page_hook(struct page *page)
                goto out;
 
        btrfs_tree_lock(eb);
-       spin_lock(&root->fs_info->hash_lock);
        btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
-       spin_unlock(&root->fs_info->hash_lock);
        btrfs_tree_unlock(eb);
        free_extent_buffer(eb);
 out: