Btrfs: fix arguments to btrfs_wait_on_page_writeback_range
[safe/jmp/linux-2.6] / fs / btrfs / disk-io.c
index ec2c915..69dce50 100644 (file)
@@ -41,6 +41,9 @@
 
 static struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
+static void free_fs_root(struct btrfs_root *root);
+
+static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
 
 /*
  * end_io_wq structs are used to do processing in task context when an IO is
@@ -121,15 +124,15 @@ static struct extent_map *btree_get_extent(struct inode *inode,
        struct extent_map *em;
        int ret;
 
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, start, len);
        if (em) {
                em->bdev =
                        BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
-               spin_unlock(&em_tree->lock);
+               read_unlock(&em_tree->lock);
                goto out;
        }
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
 
        em = alloc_extent_map(GFP_NOFS);
        if (!em) {
@@ -142,7 +145,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
        em->block_start = 0;
        em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
 
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em);
        if (ret == -EEXIST) {
                u64 failed_start = em->start;
@@ -161,7 +164,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
                free_extent_map(em);
                em = NULL;
        }
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
 
        if (ret)
                em = ERR_PTR(ret);
@@ -826,7 +829,9 @@ int btrfs_write_tree_block(struct extent_buffer *buf)
 int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
 {
        return btrfs_wait_on_page_writeback_range(buf->first_page->mapping,
-                                 buf->start, buf->start + buf->len - 1);
+                                 buf->start >> PAGE_CACHE_SHIFT,
+                                 (buf->start + buf->len - 1) >>
+                                  PAGE_CACHE_SHIFT);
 }
 
 struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
@@ -893,8 +898,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        root->fs_info = fs_info;
        root->objectid = objectid;
        root->last_trans = 0;
-       root->highest_inode = 0;
-       root->last_inode_alloc = 0;
+       root->highest_objectid = 0;
        root->name = NULL;
        root->in_sysfs = 0;
        root->inode_tree.rb_node = NULL;
@@ -907,7 +911,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        spin_lock_init(&root->inode_lock);
        mutex_init(&root->objectid_mutex);
        mutex_init(&root->log_mutex);
-       init_rwsem(&root->commit_root_sem);
        init_waitqueue_head(&root->log_writer_wait);
        init_waitqueue_head(&root->log_commit_wait[0]);
        init_waitqueue_head(&root->log_commit_wait[1]);
@@ -951,14 +954,16 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
                     root, fs_info, objectid);
        ret = btrfs_find_last_root(tree_root, objectid,
                                   &root->root_item, &root->root_key);
+       if (ret > 0)
+               return -ENOENT;
        BUG_ON(ret);
 
        generation = btrfs_root_generation(&root->root_item);
        blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
        root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
                                     blocksize, generation);
-       root->commit_root = btrfs_root_node(root);
        BUG_ON(!root->node);
+       root->commit_root = btrfs_root_node(root);
        return 0;
 }
 
@@ -1094,7 +1099,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
        struct btrfs_fs_info *fs_info = tree_root->fs_info;
        struct btrfs_path *path;
        struct extent_buffer *l;
-       u64 highest_inode;
        u64 generation;
        u32 blocksize;
        int ret = 0;
@@ -1109,7 +1113,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
                        kfree(root);
                        return ERR_PTR(ret);
                }
-               goto insert;
+               goto out;
        }
 
        __setup_root(tree_root->nodesize, tree_root->leafsize,
@@ -1119,39 +1123,30 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
        path = btrfs_alloc_path();
        BUG_ON(!path);
        ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
-       if (ret != 0) {
-               if (ret > 0)
-                       ret = -ENOENT;
-               goto out;
+       if (ret == 0) {
+               l = path->nodes[0];
+               read_extent_buffer(l, &root->root_item,
+                               btrfs_item_ptr_offset(l, path->slots[0]),
+                               sizeof(root->root_item));
+               memcpy(&root->root_key, location, sizeof(*location));
        }
-       l = path->nodes[0];
-       read_extent_buffer(l, &root->root_item,
-              btrfs_item_ptr_offset(l, path->slots[0]),
-              sizeof(root->root_item));
-       memcpy(&root->root_key, location, sizeof(*location));
-       ret = 0;
-out:
-       btrfs_release_path(root, path);
        btrfs_free_path(path);
        if (ret) {
-               kfree(root);
+               if (ret > 0)
+                       ret = -ENOENT;
                return ERR_PTR(ret);
        }
+
        generation = btrfs_root_generation(&root->root_item);
        blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
        root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
                                     blocksize, generation);
        root->commit_root = btrfs_root_node(root);
        BUG_ON(!root->node);
-insert:
-       if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
+out:
+       if (location->objectid != BTRFS_TREE_LOG_OBJECTID)
                root->ref_cows = 1;
-               ret = btrfs_find_highest_inode(root, &highest_inode);
-               if (ret == 0) {
-                       root->highest_inode = highest_inode;
-                       root->last_inode_alloc = highest_inode;
-               }
-       }
+
        return root;
 }
 
@@ -1186,39 +1181,66 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
                return fs_info->dev_root;
        if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
                return fs_info->csum_root;
-
+again:
+       spin_lock(&fs_info->fs_roots_radix_lock);
        root = radix_tree_lookup(&fs_info->fs_roots_radix,
                                 (unsigned long)location->objectid);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
        if (root)
                return root;
 
+       ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
+       if (ret == 0)
+               ret = -ENOENT;
+       if (ret < 0)
+               return ERR_PTR(ret);
+
        root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
        if (IS_ERR(root))
                return root;
 
+       WARN_ON(btrfs_root_refs(&root->root_item) == 0);
        set_anon_super(&root->anon_super, NULL);
 
+       ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+       if (ret)
+               goto fail;
+
+       spin_lock(&fs_info->fs_roots_radix_lock);
        ret = radix_tree_insert(&fs_info->fs_roots_radix,
                                (unsigned long)root->root_key.objectid,
                                root);
+       if (ret == 0)
+               root->in_radix = 1;
+       spin_unlock(&fs_info->fs_roots_radix_lock);
+       radix_tree_preload_end();
        if (ret) {
-               free_extent_buffer(root->node);
-               kfree(root);
-               return ERR_PTR(ret);
+               if (ret == -EEXIST) {
+                       free_fs_root(root);
+                       goto again;
+               }
+               goto fail;
        }
-       if (!(fs_info->sb->s_flags & MS_RDONLY)) {
-               ret = btrfs_find_dead_roots(fs_info->tree_root,
-                                           root->root_key.objectid);
-               BUG_ON(ret);
+
+       ret = btrfs_find_dead_roots(fs_info->tree_root,
+                                   root->root_key.objectid);
+       WARN_ON(ret);
+
+       if (!(fs_info->sb->s_flags & MS_RDONLY))
                btrfs_orphan_cleanup(root);
-       }
+
        return root;
+fail:
+       free_fs_root(root);
+       return ERR_PTR(ret);
 }
 
 struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
                                      struct btrfs_key *location,
                                      const char *name, int namelen)
 {
+       return btrfs_read_fs_root_no_name(fs_info, location);
+#if 0
        struct btrfs_root *root;
        int ret;
 
@@ -1235,7 +1257,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
                kfree(root);
                return ERR_PTR(ret);
        }
-#if 0
+
        ret = btrfs_sysfs_add_root(root);
        if (ret) {
                free_extent_buffer(root->node);
@@ -1243,9 +1265,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
                kfree(root);
                return ERR_PTR(ret);
        }
-#endif
        root->in_sysfs = 1;
        return root;
+#endif
 }
 
 static int btrfs_congested_fn(void *congested_data, int bdi_bits)
@@ -1324,9 +1346,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
        offset = page_offset(page);
 
        em_tree = &BTRFS_I(inode)->extent_tree;
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
        if (!em) {
                __unplug_io_fn(bdi, page);
                return;
@@ -1343,12 +1365,27 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
        free_extent_map(em);
 }
 
+/*
+ * If this fails, caller must call bdi_destroy() to get rid of the
+ * bdi again.
+ */
 static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
 {
-       bdi_init(bdi);
+       int err;
+
+       bdi->capabilities = BDI_CAP_MAP_COPY;
+       err = bdi_init(bdi);
+       if (err)
+               return err;
+
+       err = bdi_register(bdi, NULL, "btrfs-%d",
+                               atomic_inc_return(&btrfs_bdi_num));
+       if (err) {
+               bdi_destroy(bdi);
+               return err;
+       }
+
        bdi->ra_pages   = default_backing_dev_info.ra_pages;
-       bdi->state              = 0;
-       bdi->capabilities       = default_backing_dev_info.capabilities;
        bdi->unplug_io_fn       = btrfs_unplug_io_fn;
        bdi->unplug_io_data     = info;
        bdi->congested_fn       = btrfs_congested_fn;
@@ -1436,9 +1473,12 @@ static int cleaner_kthread(void *arg)
                        break;
 
                vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
-               mutex_lock(&root->fs_info->cleaner_mutex);
-               btrfs_clean_old_snapshots(root);
-               mutex_unlock(&root->fs_info->cleaner_mutex);
+
+               if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
+                   mutex_trylock(&root->fs_info->cleaner_mutex)) {
+                       btrfs_clean_old_snapshots(root);
+                       mutex_unlock(&root->fs_info->cleaner_mutex);
+               }
 
                if (freezing(current)) {
                        refrigerator();
@@ -1543,15 +1583,36 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                err = -ENOMEM;
                goto fail;
        }
-       INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
+
+       ret = init_srcu_struct(&fs_info->subvol_srcu);
+       if (ret) {
+               err = ret;
+               goto fail;
+       }
+
+       ret = setup_bdi(fs_info, &fs_info->bdi);
+       if (ret) {
+               err = ret;
+               goto fail_srcu;
+       }
+
+       fs_info->btree_inode = new_inode(sb);
+       if (!fs_info->btree_inode) {
+               err = -ENOMEM;
+               goto fail_bdi;
+       }
+
+       INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
        INIT_LIST_HEAD(&fs_info->trans_list);
        INIT_LIST_HEAD(&fs_info->dead_roots);
        INIT_LIST_HEAD(&fs_info->hashers);
        INIT_LIST_HEAD(&fs_info->delalloc_inodes);
        INIT_LIST_HEAD(&fs_info->ordered_operations);
+       INIT_LIST_HEAD(&fs_info->caching_block_groups);
        spin_lock_init(&fs_info->delalloc_lock);
        spin_lock_init(&fs_info->new_trans_lock);
        spin_lock_init(&fs_info->ref_cache_lock);
+       spin_lock_init(&fs_info->fs_roots_radix_lock);
 
        init_completion(&fs_info->kobj_unregister);
        fs_info->tree_root = tree_root;
@@ -1567,15 +1628,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        atomic_set(&fs_info->async_delalloc_pages, 0);
        atomic_set(&fs_info->async_submit_draining, 0);
        atomic_set(&fs_info->nr_async_bios, 0);
-       atomic_set(&fs_info->async_caching_threads, 0);
        fs_info->sb = sb;
        fs_info->max_extent = (u64)-1;
        fs_info->max_inline = 8192 * 1024;
-       setup_bdi(fs_info, &fs_info->bdi);
-       fs_info->btree_inode = new_inode(sb);
-       fs_info->btree_inode->i_ino = 1;
-       fs_info->btree_inode->i_nlink = 1;
-       fs_info->metadata_ratio = 8;
+       fs_info->metadata_ratio = 0;
 
        fs_info->thread_pool_size = min_t(unsigned long,
                                          num_online_cpus() + 2, 8);
@@ -1586,6 +1642,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        sb->s_blocksize = 4096;
        sb->s_blocksize_bits = blksize_bits(4096);
 
+       fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
+       fs_info->btree_inode->i_nlink = 1;
        /*
         * we set the i_size on the btree inode to the max possible int.
         * the real end of the address space is determined by all of
@@ -1604,27 +1662,32 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
        BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
 
+       BTRFS_I(fs_info->btree_inode)->root = tree_root;
+       memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
+              sizeof(struct btrfs_key));
+       BTRFS_I(fs_info->btree_inode)->dummy_inode = 1;
+       insert_inode_hash(fs_info->btree_inode);
+
        spin_lock_init(&fs_info->block_group_cache_lock);
        fs_info->block_group_cache_tree.rb_node = NULL;
 
-       extent_io_tree_init(&fs_info->pinned_extents,
+       extent_io_tree_init(&fs_info->freed_extents[0],
                             fs_info->btree_inode->i_mapping, GFP_NOFS);
+       extent_io_tree_init(&fs_info->freed_extents[1],
+                            fs_info->btree_inode->i_mapping, GFP_NOFS);
+       fs_info->pinned_extents = &fs_info->freed_extents[0];
        fs_info->do_barriers = 1;
 
-       BTRFS_I(fs_info->btree_inode)->root = tree_root;
-       memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
-              sizeof(struct btrfs_key));
-       insert_inode_hash(fs_info->btree_inode);
 
        mutex_init(&fs_info->trans_mutex);
        mutex_init(&fs_info->ordered_operations_mutex);
        mutex_init(&fs_info->tree_log_mutex);
-       mutex_init(&fs_info->drop_mutex);
        mutex_init(&fs_info->chunk_mutex);
        mutex_init(&fs_info->transaction_kthread_mutex);
        mutex_init(&fs_info->cleaner_mutex);
        mutex_init(&fs_info->volume_mutex);
-       mutex_init(&fs_info->tree_reloc_mutex);
+       init_rwsem(&fs_info->extent_commit_sem);
+       init_rwsem(&fs_info->subvol_sem);
 
        btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
        btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -1683,7 +1746,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                err = -EINVAL;
                goto fail_iput;
        }
-
+printk("thread pool is %d\n", fs_info->thread_pool_size);
        /*
         * we need to start all the end_io workers up front because the
         * queue work function gets called at interrupt time, and so it
@@ -1728,20 +1791,22 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        fs_info->endio_workers.idle_thresh = 4;
        fs_info->endio_meta_workers.idle_thresh = 4;
 
-       fs_info->endio_write_workers.idle_thresh = 64;
-       fs_info->endio_meta_write_workers.idle_thresh = 64;
+       fs_info->endio_write_workers.idle_thresh = 2;
+       fs_info->endio_meta_write_workers.idle_thresh = 2;
+
+       fs_info->endio_workers.atomic_worker_start = 1;
+       fs_info->endio_meta_workers.atomic_worker_start = 1;
+       fs_info->endio_write_workers.atomic_worker_start = 1;
+       fs_info->endio_meta_write_workers.atomic_worker_start = 1;
 
        btrfs_start_workers(&fs_info->workers, 1);
        btrfs_start_workers(&fs_info->submit_workers, 1);
        btrfs_start_workers(&fs_info->delalloc_workers, 1);
        btrfs_start_workers(&fs_info->fixup_workers, 1);
-       btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
-       btrfs_start_workers(&fs_info->endio_meta_workers,
-                           fs_info->thread_pool_size);
-       btrfs_start_workers(&fs_info->endio_meta_write_workers,
-                           fs_info->thread_pool_size);
-       btrfs_start_workers(&fs_info->endio_write_workers,
-                           fs_info->thread_pool_size);
+       btrfs_start_workers(&fs_info->endio_workers, 1);
+       btrfs_start_workers(&fs_info->endio_meta_workers, 1);
+       btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
+       btrfs_start_workers(&fs_info->endio_write_workers, 1);
 
        fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
        fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1901,6 +1966,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                }
        }
 
+       ret = btrfs_find_orphan_roots(tree_root);
+       BUG_ON(ret);
+
        if (!(sb->s_flags & MS_RDONLY)) {
                ret = btrfs_recover_relocation(tree_root);
                BUG_ON(ret);
@@ -1958,8 +2026,10 @@ fail_iput:
 
        btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
+fail_bdi:
        bdi_destroy(&fs_info->bdi);
-
+fail_srcu:
+       cleanup_srcu_struct(&fs_info->subvol_srcu);
 fail:
        kfree(extent_root);
        kfree(tree_root);
@@ -2219,20 +2289,29 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
 
 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
 {
-       WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
+       spin_lock(&fs_info->fs_roots_radix_lock);
        radix_tree_delete(&fs_info->fs_roots_radix,
                          (unsigned long)root->root_key.objectid);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
+
+       if (btrfs_root_refs(&root->root_item) == 0)
+               synchronize_srcu(&fs_info->subvol_srcu);
+
+       free_fs_root(root);
+       return 0;
+}
+
+static void free_fs_root(struct btrfs_root *root)
+{
+       WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
        if (root->anon_super.s_dev) {
                down_write(&root->anon_super.s_umount);
                kill_anon_super(&root->anon_super);
        }
-       if (root->node)
-               free_extent_buffer(root->node);
-       if (root->commit_root)
-               free_extent_buffer(root->commit_root);
+       free_extent_buffer(root->node);
+       free_extent_buffer(root->commit_root);
        kfree(root->name);
        kfree(root);
-       return 0;
 }
 
 static int del_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2241,6 +2320,20 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info)
        struct btrfs_root *gang[8];
        int i;
 
+       while (!list_empty(&fs_info->dead_roots)) {
+               gang[0] = list_entry(fs_info->dead_roots.next,
+                                    struct btrfs_root, root_list);
+               list_del(&gang[0]->root_list);
+
+               if (gang[0]->in_radix) {
+                       btrfs_free_fs_root(fs_info, gang[0]);
+               } else {
+                       free_extent_buffer(gang[0]->node);
+                       free_extent_buffer(gang[0]->commit_root);
+                       kfree(gang[0]);
+               }
+       }
+
        while (1) {
                ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
                                             (void **)gang, 0,
@@ -2270,9 +2363,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
                root_objectid = gang[ret - 1]->root_key.objectid + 1;
                for (i = 0; i < ret; i++) {
                        root_objectid = gang[i]->root_key.objectid;
-                       ret = btrfs_find_dead_roots(fs_info->tree_root,
-                                                   root_objectid);
-                       BUG_ON(ret);
                        btrfs_orphan_cleanup(gang[i]);
                }
                root_objectid++;
@@ -2318,6 +2408,9 @@ int close_ctree(struct btrfs_root *root)
                        printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
        }
 
+       fs_info->closing = 2;
+       smp_mb();
+
        if (fs_info->delalloc_bytes) {
                printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
                       (unsigned long long)fs_info->delalloc_bytes);
@@ -2339,7 +2432,6 @@ int close_ctree(struct btrfs_root *root)
        free_extent_buffer(root->fs_info->csum_root->commit_root);
 
        btrfs_free_block_groups(root->fs_info);
-       btrfs_free_super_mirror_extents(root->fs_info);
 
        del_fs_roots(fs_info);
 
@@ -2358,6 +2450,7 @@ int close_ctree(struct btrfs_root *root)
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
 
        bdi_destroy(&fs_info->bdi);
+       cleanup_srcu_struct(&fs_info->subvol_srcu);
 
        kfree(fs_info->extent_root);
        kfree(fs_info->tree_root);