Btrfs: switch extent_map to a rw lock
authorChris Mason <chris.mason@oracle.com>
Wed, 2 Sep 2009 20:24:52 +0000 (16:24 -0400)
committerChris Mason <chris.mason@oracle.com>
Fri, 11 Sep 2009 17:31:05 +0000 (13:31 -0400)
There are two main users of the extent_map tree.  The
first is regular file inodes, where it is evenly spread
between readers and writers.

The second is the chunk allocation tree, which maps blocks from
logical addresses to phyiscal ones, and it is 99.99% reads.

The mapping tree is a point of lock contention during heavy IO
workloads, so this commit switches things to a rw lock.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/compression.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_map.c
fs/btrfs/extent_map.h
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/relocation.c
fs/btrfs/volumes.c

index de1e2fd..78451a5 100644 (file)
@@ -507,10 +507,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
                 */
                set_page_extent_mapped(page);
                lock_extent(tree, last_offset, end, GFP_NOFS);
-               spin_lock(&em_tree->lock);
+               read_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, last_offset,
                                           PAGE_CACHE_SIZE);
-               spin_unlock(&em_tree->lock);
+               read_unlock(&em_tree->lock);
 
                if (!em || last_offset < em->start ||
                    (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
@@ -594,11 +594,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        em_tree = &BTRFS_I(inode)->extent_tree;
 
        /* we need the actual starting offset of this extent in the file */
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree,
                                   page_offset(bio->bi_io_vec->bv_page),
                                   PAGE_CACHE_SIZE);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
 
        compressed_len = em->block_len;
        cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
index 20cefc6..b6cfdd9 100644 (file)
@@ -121,15 +121,15 @@ static struct extent_map *btree_get_extent(struct inode *inode,
        struct extent_map *em;
        int ret;
 
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, start, len);
        if (em) {
                em->bdev =
                        BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
-               spin_unlock(&em_tree->lock);
+               read_unlock(&em_tree->lock);
                goto out;
        }
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
 
        em = alloc_extent_map(GFP_NOFS);
        if (!em) {
@@ -142,7 +142,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
        em->block_start = 0;
        em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
 
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em);
        if (ret == -EEXIST) {
                u64 failed_start = em->start;
@@ -161,7 +161,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
                free_extent_map(em);
                em = NULL;
        }
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
 
        if (ret)
                em = ERR_PTR(ret);
@@ -1323,9 +1323,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
        offset = page_offset(page);
 
        em_tree = &BTRFS_I(inode)->extent_tree;
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
        if (!em) {
                __unplug_io_fn(bdi, page);
                return;
index 72a2b9c..edd86ae 100644 (file)
@@ -5396,9 +5396,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
        lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
        while (1) {
                int ret;
-               spin_lock(&em_tree->lock);
+               write_lock(&em_tree->lock);
                ret = add_extent_mapping(em_tree, em);
-               spin_unlock(&em_tree->lock);
+               write_unlock(&em_tree->lock);
                if (ret != -EEXIST) {
                        free_extent_map(em);
                        break;
index 8d7a152..41cf1b4 100644 (file)
@@ -2786,15 +2786,15 @@ int try_release_extent_mapping(struct extent_map_tree *map,
                u64 len;
                while (start <= end) {
                        len = end - start + 1;
-                       spin_lock(&map->lock);
+                       write_lock(&map->lock);
                        em = lookup_extent_mapping(map, start, len);
                        if (!em || IS_ERR(em)) {
-                               spin_unlock(&map->lock);
+                               write_unlock(&map->lock);
                                break;
                        }
                        if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
                            em->start != start) {
-                               spin_unlock(&map->lock);
+                               write_unlock(&map->lock);
                                free_extent_map(em);
                                break;
                        }
@@ -2808,7 +2808,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
                                free_extent_map(em);
                        }
                        start = extent_map_end(em);
-                       spin_unlock(&map->lock);
+                       write_unlock(&map->lock);
 
                        /* once for us */
                        free_extent_map(em);
index 30c9365..72e9fa3 100644 (file)
@@ -36,7 +36,7 @@ void extent_map_exit(void)
 void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
 {
        tree->map.rb_node = NULL;
-       spin_lock_init(&tree->lock);
+       rwlock_init(&tree->lock);
 }
 
 /**
@@ -222,7 +222,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
                ret = -EEXIST;
                goto out;
        }
-       assert_spin_locked(&tree->lock);
        rb = tree_insert(&tree->map, em->start, &em->rb_node);
        if (rb) {
                ret = -EEXIST;
@@ -285,7 +284,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
        struct rb_node *next = NULL;
        u64 end = range_end(start, len);
 
-       assert_spin_locked(&tree->lock);
        rb_node = __tree_search(&tree->map, start, &prev, &next);
        if (!rb_node && prev) {
                em = rb_entry(prev, struct extent_map, rb_node);
@@ -331,7 +329,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
        int ret = 0;
 
        WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
-       assert_spin_locked(&tree->lock);
        rb_erase(&em->rb_node, &tree->map);
        em->in_tree = 0;
        return ret;
index fb6eeef..6216dfb 100644 (file)
@@ -31,7 +31,7 @@ struct extent_map {
 
 struct extent_map_tree {
        struct rb_root map;
-       spinlock_t lock;
+       rwlock_t lock;
 };
 
 static inline u64 extent_map_end(struct extent_map *em)
index a760d97..8a9c76a 100644 (file)
@@ -188,15 +188,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                if (!split2)
                        split2 = alloc_extent_map(GFP_NOFS);
 
-               spin_lock(&em_tree->lock);
+               write_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, start, len);
                if (!em) {
-                       spin_unlock(&em_tree->lock);
+                       write_unlock(&em_tree->lock);
                        break;
                }
                flags = em->flags;
                if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
-                       spin_unlock(&em_tree->lock);
+                       write_unlock(&em_tree->lock);
                        if (em->start <= start &&
                            (!testend || em->start + em->len >= start + len)) {
                                free_extent_map(em);
@@ -259,7 +259,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                        free_extent_map(split);
                        split = NULL;
                }
-               spin_unlock(&em_tree->lock);
+               write_unlock(&em_tree->lock);
 
                /* once for us */
                free_extent_map(em);
index 04b53b5..f1df117 100644 (file)
@@ -612,9 +612,9 @@ static noinline int submit_compressed_extents(struct inode *inode,
                set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
 
                while (1) {
-                       spin_lock(&em_tree->lock);
+                       write_lock(&em_tree->lock);
                        ret = add_extent_mapping(em_tree, em);
-                       spin_unlock(&em_tree->lock);
+                       write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
                                break;
@@ -748,9 +748,9 @@ static noinline int cow_file_range(struct inode *inode,
                set_bit(EXTENT_FLAG_PINNED, &em->flags);
 
                while (1) {
-                       spin_lock(&em_tree->lock);
+                       write_lock(&em_tree->lock);
                        ret = add_extent_mapping(em_tree, em);
-                       spin_unlock(&em_tree->lock);
+                       write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
                                break;
@@ -1081,9 +1081,9 @@ out_check:
                        em->bdev = root->fs_info->fs_devices->latest_bdev;
                        set_bit(EXTENT_FLAG_PINNED, &em->flags);
                        while (1) {
-                               spin_lock(&em_tree->lock);
+                               write_lock(&em_tree->lock);
                                ret = add_extent_mapping(em_tree, em);
-                               spin_unlock(&em_tree->lock);
+                               write_unlock(&em_tree->lock);
                                if (ret != -EEXIST) {
                                        free_extent_map(em);
                                        break;
@@ -1670,13 +1670,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
                failrec->last_mirror = 0;
                failrec->bio_flags = 0;
 
-               spin_lock(&em_tree->lock);
+               read_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, start, failrec->len);
                if (em->start > start || em->start + em->len < start) {
                        free_extent_map(em);
                        em = NULL;
                }
-               spin_unlock(&em_tree->lock);
+               read_unlock(&em_tree->lock);
 
                if (!em || IS_ERR(em)) {
                        kfree(failrec);
@@ -4069,11 +4069,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
        int compressed;
 
 again:
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, start, len);
        if (em)
                em->bdev = root->fs_info->fs_devices->latest_bdev;
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
 
        if (em) {
                if (em->start > start || em->start + em->len <= start)
@@ -4264,7 +4264,7 @@ insert:
        }
 
        err = 0;
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em);
        /* it is possible that someone inserted the extent into the tree
         * while we had the lock dropped.  It is also possible that
@@ -4304,7 +4304,7 @@ insert:
                        err = 0;
                }
        }
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
 out:
        if (path)
                btrfs_free_path(path);
index c04f7f2..4adab90 100644 (file)
@@ -2646,9 +2646,9 @@ int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key)
        lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
        while (1) {
                int ret;
-               spin_lock(&em_tree->lock);
+               write_lock(&em_tree->lock);
                ret = add_extent_mapping(em_tree, em);
-               spin_unlock(&em_tree->lock);
+               write_unlock(&em_tree->lock);
                if (ret != -EEXIST) {
                        free_extent_map(em);
                        break;
index a7e5377..d2358c0 100644 (file)
@@ -1749,9 +1749,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
         * step two, delete the device extents and the
         * chunk tree entries
         */
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, chunk_offset, 1);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
 
        BUG_ON(em->start > chunk_offset ||
               em->start + em->len < chunk_offset);
@@ -1780,9 +1780,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
        ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
        BUG_ON(ret);
 
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
        remove_extent_mapping(em_tree, em);
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
 
        kfree(map);
        em->bdev = NULL;
@@ -2294,9 +2294,9 @@ again:
        em->block_len = em->len;
 
        em_tree = &extent_root->fs_info->mapping_tree.map_tree;
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em);
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
        BUG_ON(ret);
        free_extent_map(em);
 
@@ -2491,9 +2491,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
        int readonly = 0;
        int i;
 
-       spin_lock(&map_tree->map_tree.lock);
+       read_lock(&map_tree->map_tree.lock);
        em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
-       spin_unlock(&map_tree->map_tree.lock);
+       read_unlock(&map_tree->map_tree.lock);
        if (!em)
                return 1;
 
@@ -2518,11 +2518,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
        struct extent_map *em;
 
        while (1) {
-               spin_lock(&tree->map_tree.lock);
+               write_lock(&tree->map_tree.lock);
                em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
                if (em)
                        remove_extent_mapping(&tree->map_tree, em);
-               spin_unlock(&tree->map_tree.lock);
+               write_unlock(&tree->map_tree.lock);
                if (!em)
                        break;
                kfree(em->bdev);
@@ -2540,9 +2540,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
        struct extent_map_tree *em_tree = &map_tree->map_tree;
        int ret;
 
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, logical, len);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
        BUG_ON(!em);
 
        BUG_ON(em->start > logical || em->start + em->len < logical);
@@ -2604,9 +2604,9 @@ again:
                atomic_set(&multi->error, 0);
        }
 
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, logical, *length);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
 
        if (!em && unplug_page)
                return 0;
@@ -2763,9 +2763,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
        u64 stripe_nr;
        int i, j, nr = 0;
 
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, chunk_start, 1);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
 
        BUG_ON(!em || em->start != chunk_start);
        map = (struct map_lookup *)em->bdev;
@@ -3053,9 +3053,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
        logical = key->offset;
        length = btrfs_chunk_length(leaf, chunk);
 
-       spin_lock(&map_tree->map_tree.lock);
+       read_lock(&map_tree->map_tree.lock);
        em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
-       spin_unlock(&map_tree->map_tree.lock);
+       read_unlock(&map_tree->map_tree.lock);
 
        /* already mapped? */
        if (em && em->start <= logical && em->start + em->len > logical) {
@@ -3114,9 +3114,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                map->stripes[i].dev->in_fs_metadata = 1;
        }
 
-       spin_lock(&map_tree->map_tree.lock);
+       write_lock(&map_tree->map_tree.lock);
        ret = add_extent_mapping(&map_tree->map_tree, em);
-       spin_unlock(&map_tree->map_tree.lock);
+       write_unlock(&map_tree->map_tree.lock);
        BUG_ON(ret);
        free_extent_map(em);