X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fbtrfs%2Fdisk-io.c;h=69dce50aabd210ed669fc22246d26cfdc5856eb0;hb=35d62a942db5ae03104929fe7397835b572c4bc4;hp=ec2c915f7f4a61a6e5ca8818a96a7495d7f2e053;hpb=817d52f8dba26d0295c26035531c30ce5f1e3c3e;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ec2c915..69dce50 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -41,6 +41,9 @@ static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); +static void free_fs_root(struct btrfs_root *root); + +static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); /* * end_io_wq structs are used to do processing in task context when an IO is @@ -121,15 +124,15 @@ static struct extent_map *btree_get_extent(struct inode *inode, struct extent_map *em; int ret; - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (em) { em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); goto out; } - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); em = alloc_extent_map(GFP_NOFS); if (!em) { @@ -142,7 +145,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, em->block_start = 0; em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); if (ret == -EEXIST) { u64 failed_start = em->start; @@ -161,7 +164,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, free_extent_map(em); em = NULL; } - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret) em = ERR_PTR(ret); @@ -826,7 +829,9 @@ int btrfs_write_tree_block(struct extent_buffer *buf) int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) { return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, - buf->start, buf->start + buf->len - 1); + buf->start >> PAGE_CACHE_SHIFT, + (buf->start + buf->len - 1) >> + PAGE_CACHE_SHIFT); } struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, @@ -893,8 +898,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->fs_info = fs_info; root->objectid = objectid; root->last_trans = 0; - root->highest_inode = 0; - root->last_inode_alloc = 0; + root->highest_objectid = 0; root->name = NULL; root->in_sysfs = 0; root->inode_tree.rb_node = NULL; @@ -907,7 +911,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->inode_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); - init_rwsem(&root->commit_root_sem); init_waitqueue_head(&root->log_writer_wait); init_waitqueue_head(&root->log_commit_wait[0]); init_waitqueue_head(&root->log_commit_wait[1]); @@ -951,14 +954,16 @@ static int find_and_setup_root(struct btrfs_root *tree_root, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); + if (ret > 0) + return -ENOENT; BUG_ON(ret); generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); - root->commit_root = btrfs_root_node(root); BUG_ON(!root->node); + root->commit_root = btrfs_root_node(root); return 0; } @@ -1094,7 +1099,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_path *path; struct extent_buffer *l; - u64 highest_inode; u64 generation; u32 blocksize; int ret = 0; @@ -1109,7 +1113,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, kfree(root); return ERR_PTR(ret); } - goto insert; + goto out; } __setup_root(tree_root->nodesize, tree_root->leafsize, @@ -1119,39 +1123,30 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, path = btrfs_alloc_path(); BUG_ON(!path); ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); - if (ret != 0) { - if (ret > 0) - ret = -ENOENT; - goto out; + if (ret == 0) { + l = path->nodes[0]; + read_extent_buffer(l, &root->root_item, + btrfs_item_ptr_offset(l, path->slots[0]), + sizeof(root->root_item)); + memcpy(&root->root_key, location, sizeof(*location)); } - l = path->nodes[0]; - read_extent_buffer(l, &root->root_item, - btrfs_item_ptr_offset(l, path->slots[0]), - sizeof(root->root_item)); - memcpy(&root->root_key, location, sizeof(*location)); - ret = 0; -out: - btrfs_release_path(root, path); btrfs_free_path(path); if (ret) { - kfree(root); + if (ret > 0) + ret = -ENOENT; return ERR_PTR(ret); } + generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); root->commit_root = btrfs_root_node(root); BUG_ON(!root->node); -insert: - if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { +out: + if (location->objectid != BTRFS_TREE_LOG_OBJECTID) root->ref_cows = 1; - ret = btrfs_find_highest_inode(root, &highest_inode); - if (ret == 0) { - root->highest_inode = highest_inode; - root->last_inode_alloc = highest_inode; - } - } + return root; } @@ -1186,39 +1181,66 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, return fs_info->dev_root; if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) return fs_info->csum_root; - +again: + spin_lock(&fs_info->fs_roots_radix_lock); root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); + spin_unlock(&fs_info->fs_roots_radix_lock); if (root) return root; + ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); + if (ret == 0) + ret = -ENOENT; + if (ret < 0) + return ERR_PTR(ret); + root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); if (IS_ERR(root)) return root; + WARN_ON(btrfs_root_refs(&root->root_item) == 0); set_anon_super(&root->anon_super, NULL); + ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); + if (ret) + goto fail; + + spin_lock(&fs_info->fs_roots_radix_lock); ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, root); + if (ret == 0) + root->in_radix = 1; + spin_unlock(&fs_info->fs_roots_radix_lock); + radix_tree_preload_end(); if (ret) { - free_extent_buffer(root->node); - kfree(root); - return ERR_PTR(ret); + if (ret == -EEXIST) { + free_fs_root(root); + goto again; + } + goto fail; } - if (!(fs_info->sb->s_flags & MS_RDONLY)) { - ret = btrfs_find_dead_roots(fs_info->tree_root, - root->root_key.objectid); - BUG_ON(ret); + + ret = btrfs_find_dead_roots(fs_info->tree_root, + root->root_key.objectid); + WARN_ON(ret); + + if (!(fs_info->sb->s_flags & MS_RDONLY)) btrfs_orphan_cleanup(root); - } + return root; +fail: + free_fs_root(root); + return ERR_PTR(ret); } struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen) { + return btrfs_read_fs_root_no_name(fs_info, location); +#if 0 struct btrfs_root *root; int ret; @@ -1235,7 +1257,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } -#if 0 + ret = btrfs_sysfs_add_root(root); if (ret) { free_extent_buffer(root->node); @@ -1243,9 +1265,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } -#endif root->in_sysfs = 1; return root; +#endif } static int btrfs_congested_fn(void *congested_data, int bdi_bits) @@ -1324,9 +1346,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) offset = page_offset(page); em_tree = &BTRFS_I(inode)->extent_tree; - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); if (!em) { __unplug_io_fn(bdi, page); return; @@ -1343,12 +1365,27 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) free_extent_map(em); } +/* + * If this fails, caller must call bdi_destroy() to get rid of the + * bdi again. + */ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { - bdi_init(bdi); + int err; + + bdi->capabilities = BDI_CAP_MAP_COPY; + err = bdi_init(bdi); + if (err) + return err; + + err = bdi_register(bdi, NULL, "btrfs-%d", + atomic_inc_return(&btrfs_bdi_num)); + if (err) { + bdi_destroy(bdi); + return err; + } + bdi->ra_pages = default_backing_dev_info.ra_pages; - bdi->state = 0; - bdi->capabilities = default_backing_dev_info.capabilities; bdi->unplug_io_fn = btrfs_unplug_io_fn; bdi->unplug_io_data = info; bdi->congested_fn = btrfs_congested_fn; @@ -1436,9 +1473,12 @@ static int cleaner_kthread(void *arg) break; vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); - mutex_lock(&root->fs_info->cleaner_mutex); - btrfs_clean_old_snapshots(root); - mutex_unlock(&root->fs_info->cleaner_mutex); + + if (!(root->fs_info->sb->s_flags & MS_RDONLY) && + mutex_trylock(&root->fs_info->cleaner_mutex)) { + btrfs_clean_old_snapshots(root); + mutex_unlock(&root->fs_info->cleaner_mutex); + } if (freezing(current)) { refrigerator(); @@ -1543,15 +1583,36 @@ struct btrfs_root *open_ctree(struct super_block *sb, err = -ENOMEM; goto fail; } - INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); + + ret = init_srcu_struct(&fs_info->subvol_srcu); + if (ret) { + err = ret; + goto fail; + } + + ret = setup_bdi(fs_info, &fs_info->bdi); + if (ret) { + err = ret; + goto fail_srcu; + } + + fs_info->btree_inode = new_inode(sb); + if (!fs_info->btree_inode) { + err = -ENOMEM; + goto fail_bdi; + } + + INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); INIT_LIST_HEAD(&fs_info->delalloc_inodes); INIT_LIST_HEAD(&fs_info->ordered_operations); + INIT_LIST_HEAD(&fs_info->caching_block_groups); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); spin_lock_init(&fs_info->ref_cache_lock); + spin_lock_init(&fs_info->fs_roots_radix_lock); init_completion(&fs_info->kobj_unregister); fs_info->tree_root = tree_root; @@ -1567,15 +1628,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); - atomic_set(&fs_info->async_caching_threads, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; - setup_bdi(fs_info, &fs_info->bdi); - fs_info->btree_inode = new_inode(sb); - fs_info->btree_inode->i_ino = 1; - fs_info->btree_inode->i_nlink = 1; - fs_info->metadata_ratio = 8; + fs_info->metadata_ratio = 0; fs_info->thread_pool_size = min_t(unsigned long, num_online_cpus() + 2, 8); @@ -1586,6 +1642,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); + fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; + fs_info->btree_inode->i_nlink = 1; /* * we set the i_size on the btree inode to the max possible int. * the real end of the address space is determined by all of @@ -1604,27 +1662,32 @@ struct btrfs_root *open_ctree(struct super_block *sb, BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; + BTRFS_I(fs_info->btree_inode)->root = tree_root; + memset(&BTRFS_I(fs_info->btree_inode)->location, 0, + sizeof(struct btrfs_key)); + BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; + insert_inode_hash(fs_info->btree_inode); + spin_lock_init(&fs_info->block_group_cache_lock); fs_info->block_group_cache_tree.rb_node = NULL; - extent_io_tree_init(&fs_info->pinned_extents, + extent_io_tree_init(&fs_info->freed_extents[0], fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_io_tree_init(&fs_info->freed_extents[1], + fs_info->btree_inode->i_mapping, GFP_NOFS); + fs_info->pinned_extents = &fs_info->freed_extents[0]; fs_info->do_barriers = 1; - BTRFS_I(fs_info->btree_inode)->root = tree_root; - memset(&BTRFS_I(fs_info->btree_inode)->location, 0, - sizeof(struct btrfs_key)); - insert_inode_hash(fs_info->btree_inode); mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->ordered_operations_mutex); mutex_init(&fs_info->tree_log_mutex); - mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); - mutex_init(&fs_info->tree_reloc_mutex); + init_rwsem(&fs_info->extent_commit_sem); + init_rwsem(&fs_info->subvol_sem); btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); btrfs_init_free_cluster(&fs_info->data_alloc_cluster); @@ -1683,7 +1746,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, err = -EINVAL; goto fail_iput; } - +printk("thread pool is %d\n", fs_info->thread_pool_size); /* * we need to start all the end_io workers up front because the * queue work function gets called at interrupt time, and so it @@ -1728,20 +1791,22 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->endio_workers.idle_thresh = 4; fs_info->endio_meta_workers.idle_thresh = 4; - fs_info->endio_write_workers.idle_thresh = 64; - fs_info->endio_meta_write_workers.idle_thresh = 64; + fs_info->endio_write_workers.idle_thresh = 2; + fs_info->endio_meta_write_workers.idle_thresh = 2; + + fs_info->endio_workers.atomic_worker_start = 1; + fs_info->endio_meta_workers.atomic_worker_start = 1; + fs_info->endio_write_workers.atomic_worker_start = 1; + fs_info->endio_meta_write_workers.atomic_worker_start = 1; btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->delalloc_workers, 1); btrfs_start_workers(&fs_info->fixup_workers, 1); - btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); - btrfs_start_workers(&fs_info->endio_meta_workers, - fs_info->thread_pool_size); - btrfs_start_workers(&fs_info->endio_meta_write_workers, - fs_info->thread_pool_size); - btrfs_start_workers(&fs_info->endio_write_workers, - fs_info->thread_pool_size); + btrfs_start_workers(&fs_info->endio_workers, 1); + btrfs_start_workers(&fs_info->endio_meta_workers, 1); + btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); + btrfs_start_workers(&fs_info->endio_write_workers, 1); fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, @@ -1901,6 +1966,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, } } + ret = btrfs_find_orphan_roots(tree_root); + BUG_ON(ret); + if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_recover_relocation(tree_root); BUG_ON(ret); @@ -1958,8 +2026,10 @@ fail_iput: btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); +fail_bdi: bdi_destroy(&fs_info->bdi); - +fail_srcu: + cleanup_srcu_struct(&fs_info->subvol_srcu); fail: kfree(extent_root); kfree(tree_root); @@ -2219,20 +2289,29 @@ int write_ctree_super(struct btrfs_trans_handle *trans, int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { - WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); + spin_lock(&fs_info->fs_roots_radix_lock); radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); + spin_unlock(&fs_info->fs_roots_radix_lock); + + if (btrfs_root_refs(&root->root_item) == 0) + synchronize_srcu(&fs_info->subvol_srcu); + + free_fs_root(root); + return 0; +} + +static void free_fs_root(struct btrfs_root *root) +{ + WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); if (root->anon_super.s_dev) { down_write(&root->anon_super.s_umount); kill_anon_super(&root->anon_super); } - if (root->node) - free_extent_buffer(root->node); - if (root->commit_root) - free_extent_buffer(root->commit_root); + free_extent_buffer(root->node); + free_extent_buffer(root->commit_root); kfree(root->name); kfree(root); - return 0; } static int del_fs_roots(struct btrfs_fs_info *fs_info) @@ -2241,6 +2320,20 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) struct btrfs_root *gang[8]; int i; + while (!list_empty(&fs_info->dead_roots)) { + gang[0] = list_entry(fs_info->dead_roots.next, + struct btrfs_root, root_list); + list_del(&gang[0]->root_list); + + if (gang[0]->in_radix) { + btrfs_free_fs_root(fs_info, gang[0]); + } else { + free_extent_buffer(gang[0]->node); + free_extent_buffer(gang[0]->commit_root); + kfree(gang[0]); + } + } + while (1) { ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)gang, 0, @@ -2270,9 +2363,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) root_objectid = gang[ret - 1]->root_key.objectid + 1; for (i = 0; i < ret; i++) { root_objectid = gang[i]->root_key.objectid; - ret = btrfs_find_dead_roots(fs_info->tree_root, - root_objectid); - BUG_ON(ret); btrfs_orphan_cleanup(gang[i]); } root_objectid++; @@ -2318,6 +2408,9 @@ int close_ctree(struct btrfs_root *root) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } + fs_info->closing = 2; + smp_mb(); + if (fs_info->delalloc_bytes) { printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", (unsigned long long)fs_info->delalloc_bytes); @@ -2339,7 +2432,6 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(root->fs_info->csum_root->commit_root); btrfs_free_block_groups(root->fs_info); - btrfs_free_super_mirror_extents(root->fs_info); del_fs_roots(fs_info); @@ -2358,6 +2450,7 @@ int close_ctree(struct btrfs_root *root) btrfs_mapping_tree_free(&fs_info->mapping_tree); bdi_destroy(&fs_info->bdi); + cleanup_srcu_struct(&fs_info->subvol_srcu); kfree(fs_info->extent_root); kfree(fs_info->tree_root);