Btrfs: Seed device support
[safe/jmp/linux-2.6] / fs / btrfs / extent_io.c
index 7503bd4..54d013c 100644 (file)
@@ -30,7 +30,7 @@ static struct kmem_cache *extent_buffer_cache;
 static LIST_HEAD(buffers);
 static LIST_HEAD(states);
 
-#define LEAK_DEBUG 1
+#define LEAK_DEBUG 0
 #ifdef LEAK_DEBUG
 static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED;
 #endif
@@ -47,6 +47,11 @@ struct extent_page_data {
        struct bio *bio;
        struct extent_io_tree *tree;
        get_extent_t *get_extent;
+
+       /* tells writepage not to lock the state bits for this range
+        * it still does the unlocking
+        */
+       int extent_locked;
 };
 
 int __init extent_io_init(void)
@@ -946,8 +951,12 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
 
        err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
                             &failed_start, mask);
-       if (err == -EEXIST)
+       if (err == -EEXIST) {
+               if (failed_start > start)
+                       clear_extent_bit(tree, start, failed_start - 1,
+                                        EXTENT_LOCKED, 1, 0, mask);
                return 0;
+       }
        return 1;
 }
 EXPORT_SYMBOL(try_lock_extent);
@@ -1148,7 +1157,8 @@ static noinline int __unlock_for_delalloc(struct inode *inode,
 
        while(nr_pages > 0) {
                ret = find_get_pages_contig(inode->i_mapping, index,
-                                    min(nr_pages, ARRAY_SIZE(pages)), pages);
+                                    min_t(unsigned long, nr_pages,
+                                    ARRAY_SIZE(pages)), pages);
                for (i = 0; i < ret; i++) {
                        if (pages[i] != locked_page)
                                unlock_page(pages[i]);
@@ -1183,7 +1193,8 @@ static noinline int lock_delalloc_pages(struct inode *inode,
        nrpages = end_index - index + 1;
        while(nrpages > 0) {
                ret = find_get_pages_contig(inode->i_mapping, index,
-                                    min(nrpages, ARRAY_SIZE(pages)), pages);
+                                    min_t(unsigned long,
+                                    nrpages, ARRAY_SIZE(pages)), pages);
                if (ret == 0) {
                        ret = -EAGAIN;
                        goto done;
@@ -1194,11 +1205,19 @@ static noinline int lock_delalloc_pages(struct inode *inode,
                         * the caller is taking responsibility for
                         * locked_page
                         */
-                       if (pages[i] != locked_page)
+                       if (pages[i] != locked_page) {
                                lock_page(pages[i]);
+                               if (!PageDirty(pages[i]) ||
+                                   pages[i]->mapping != inode->i_mapping) {
+                                       ret = -EAGAIN;
+                                       unlock_page(pages[i]);
+                                       page_cache_release(pages[i]);
+                                       goto done;
+                               }
+                       }
                        page_cache_release(pages[i]);
+                       pages_locked++;
                }
-               pages_locked += ret;
                nrpages -= ret;
                index += ret;
                cond_resched();
@@ -1238,19 +1257,27 @@ again:
        delalloc_end = 0;
        found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
                                    max_bytes);
-       if (!found) {
+       if (!found || delalloc_end <= *start) {
                *start = delalloc_start;
                *end = delalloc_end;
                return found;
        }
 
        /*
+        * start comes from the offset of locked_page.  We have to lock
+        * pages in order, so we can't process delalloc bytes before
+        * locked_page
+        */
+       if (delalloc_start < *start) {
+               delalloc_start = *start;
+       }
+
+       /*
         * make sure to limit the number of pages we try to lock down
         * if we're looping.
         */
        if (delalloc_end + 1 - delalloc_start > max_bytes && loops) {
-               delalloc_end = (delalloc_start + PAGE_CACHE_SIZE - 1) &
-                       ~((u64)PAGE_CACHE_SIZE - 1);
+               delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
        }
        /* step two, lock all the pages after the page that has start */
        ret = lock_delalloc_pages(inode, locked_page,
@@ -1293,7 +1320,10 @@ out_failed:
 int extent_clear_unlock_delalloc(struct inode *inode,
                                struct extent_io_tree *tree,
                                u64 start, u64 end, struct page *locked_page,
-                               int clear_dirty, int set_writeback,
+                               int unlock_pages,
+                               int clear_unlock,
+                               int clear_delalloc, int clear_dirty,
+                               int set_writeback,
                                int end_writeback)
 {
        int ret;
@@ -1302,16 +1332,24 @@ int extent_clear_unlock_delalloc(struct inode *inode,
        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
        unsigned long nr_pages = end_index - index + 1;
        int i;
-       int clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC;
+       int clear_bits = 0;
 
+       if (clear_unlock)
+               clear_bits |= EXTENT_LOCKED;
        if (clear_dirty)
                clear_bits |= EXTENT_DIRTY;
 
+       if (clear_delalloc)
+               clear_bits |= EXTENT_DELALLOC;
+
        clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS);
+       if (!(unlock_pages || clear_dirty || set_writeback || end_writeback))
+               return 0;
 
        while(nr_pages > 0) {
                ret = find_get_pages_contig(inode->i_mapping, index,
-                                    min(nr_pages, ARRAY_SIZE(pages)), pages);
+                                    min_t(unsigned long,
+                                    nr_pages, ARRAY_SIZE(pages)), pages);
                for (i = 0; i < ret; i++) {
                        if (pages[i] == locked_page) {
                                page_cache_release(pages[i]);
@@ -1323,7 +1361,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
                                set_page_writeback(pages[i]);
                        if (end_writeback)
                                end_page_writeback(pages[i]);
-                       unlock_page(pages[i]);
+                       if (unlock_pages)
+                               unlock_page(pages[i]);
                        page_cache_release(pages[i]);
                }
                nr_pages -= ret;
@@ -1728,9 +1767,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                        }
                }
 
-               if (uptodate)
+               if (uptodate) {
                        set_extent_uptodate(tree, start, end,
                                            GFP_ATOMIC);
+               }
                unlock_extent(tree, start, end, GFP_ATOMIC);
 
                if (whole_page) {
@@ -1859,7 +1899,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
        int contig = 0;
        int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
        int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
-       size_t page_size = min(size, PAGE_CACHE_SIZE);
+       size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
 
        if (bio_ret && *bio_ret) {
                bio = *bio_ret;
@@ -1912,6 +1952,7 @@ void set_page_extent_mapped(struct page *page)
                set_page_private(page, EXTENT_PAGE_PRIVATE);
        }
 }
+EXPORT_SYMBOL(set_page_extent_mapped);
 
 void set_page_extent_head(struct page *page, unsigned long len)
 {
@@ -2011,6 +2052,8 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur);
                }
                bdev = em->bdev;
                block_start = em->block_start;
+               if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+                       block_start = EXTENT_MAP_HOLE;
                free_extent_map(em);
                em = NULL;
 
@@ -2128,6 +2171,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        u64 delalloc_end;
        int page_started;
        int compressed;
+       unsigned long nr_written = 0;
 
        WARN_ON(!PageLocked(page));
        pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
@@ -2154,27 +2198,33 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        delalloc_start = start;
        delalloc_end = 0;
        page_started = 0;
-       while(delalloc_end < page_end) {
-               nr_delalloc = find_lock_delalloc_range(inode, tree,
+       if (!epd->extent_locked) {
+               while(delalloc_end < page_end) {
+                       nr_delalloc = find_lock_delalloc_range(inode, tree,
                                                       page,
                                                       &delalloc_start,
                                                       &delalloc_end,
                                                       128 * 1024 * 1024);
-               if (nr_delalloc == 0) {
+                       if (nr_delalloc == 0) {
+                               delalloc_start = delalloc_end + 1;
+                               continue;
+                       }
+                       tree->ops->fill_delalloc(inode, page, delalloc_start,
+                                                delalloc_end, &page_started,
+                                                &nr_written);
                        delalloc_start = delalloc_end + 1;
-                       continue;
                }
-               tree->ops->fill_delalloc(inode, page, delalloc_start,
-                                        delalloc_end, &page_started);
-               delalloc_start = delalloc_end + 1;
-       }
 
-       /* did the fill delalloc function already unlock and start the IO? */
-       if (page_started) {
-               return 0;
+               /* did the fill delalloc function already unlock and start
+                * the IO?
+                */
+               if (page_started) {
+                       ret = 0;
+                       goto update_nr_written;
+               }
        }
-
        lock_extent(tree, start, page_end, GFP_NOFS);
+
        unlock_start = start;
 
        if (tree->ops && tree->ops->writepage_start_hook) {
@@ -2184,10 +2234,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                        unlock_extent(tree, start, page_end, GFP_NOFS);
                        redirty_page_for_writepage(wbc, page);
                        unlock_page(page);
-                       return 0;
+                       ret = 0;
+                       goto update_nr_written;
                }
        }
 
+       nr_written++;
+
        end = page_end;
        if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
                printk("found delalloc bits after lock_extent\n");
@@ -2318,6 +2371,12 @@ done:
        if (unlock_start <= page_end)
                unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
        unlock_page(page);
+
+update_nr_written:
+       wbc->nr_to_write -= nr_written;
+       if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
+           wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
+               page->mapping->writeback_index = page->index + nr_written;
        return 0;
 }
 
@@ -2416,7 +2475,7 @@ retry:
                                unlock_page(page);
                                ret = 0;
                        }
-                       if (ret || (--(wbc->nr_to_write) <= 0))
+                       if (ret || wbc->nr_to_write <= 0)
                                done = 1;
                        if (wbc->nonblocking && bdi_write_congested(bdi)) {
                                wbc->encountered_congestion = 1;
@@ -2435,11 +2494,6 @@ retry:
                index = 0;
                goto retry;
        }
-       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
-               mapping->writeback_index = index;
-
-       if (wbc->range_cont)
-               wbc->range_start = index << PAGE_CACHE_SHIFT;
        return ret;
 }
 EXPORT_SYMBOL(extent_write_cache_pages);
@@ -2454,6 +2508,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
                .bio = NULL,
                .tree = tree,
                .get_extent = get_extent,
+               .extent_locked = 0,
        };
        struct writeback_control wbc_writepages = {
                .bdi            = wbc->bdi,
@@ -2476,6 +2531,52 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
 }
 EXPORT_SYMBOL(extent_write_full_page);
 
+int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
+                             u64 start, u64 end, get_extent_t *get_extent,
+                             int mode)
+{
+       int ret = 0;
+       struct address_space *mapping = inode->i_mapping;
+       struct page *page;
+       unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
+               PAGE_CACHE_SHIFT;
+
+       struct extent_page_data epd = {
+               .bio = NULL,
+               .tree = tree,
+               .get_extent = get_extent,
+               .extent_locked = 1,
+       };
+       struct writeback_control wbc_writepages = {
+               .bdi            = inode->i_mapping->backing_dev_info,
+               .sync_mode      = mode,
+               .older_than_this = NULL,
+               .nr_to_write    = nr_pages * 2,
+               .range_start    = start,
+               .range_end      = end + 1,
+       };
+
+       while(start <= end) {
+               page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+               if (clear_page_dirty_for_io(page))
+                       ret = __extent_writepage(page, &wbc_writepages, &epd);
+               else {
+                       if (tree->ops && tree->ops->writepage_end_io_hook)
+                               tree->ops->writepage_end_io_hook(page, start,
+                                                start + PAGE_CACHE_SIZE - 1,
+                                                NULL, 1);
+                       unlock_page(page);
+               }
+               page_cache_release(page);
+               start += PAGE_CACHE_SIZE;
+       }
+
+       if (epd.bio)
+               submit_one_bio(WRITE, epd.bio, 0, 0);
+       return ret;
+}
+EXPORT_SYMBOL(extent_write_locked_range);
+
 
 int extent_writepages(struct extent_io_tree *tree,
                      struct address_space *mapping,
@@ -2487,6 +2588,7 @@ int extent_writepages(struct extent_io_tree *tree,
                .bio = NULL,
                .tree = tree,
                .get_extent = get_extent,
+               .extent_locked = 0,
        };
 
        ret = extent_write_cache_pages(tree, mapping, wbc,
@@ -2765,14 +2867,18 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
        struct inode *inode = mapping->host;
        u64 start = iblock << inode->i_blkbits;
        sector_t sector = 0;
+       size_t blksize = (1 << inode->i_blkbits);
        struct extent_map *em;
 
-       em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0);
+       lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
+                   GFP_NOFS);
+       em = get_extent(inode, NULL, 0, start, blksize, 0);
+       unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
+                     GFP_NOFS);
        if (!em || IS_ERR(em))
                return 0;
 
-       if (em->block_start == EXTENT_MAP_INLINE ||
-           em->block_start == EXTENT_MAP_HOLE)
+       if (em->block_start > EXTENT_MAP_LAST_BYTE)
                goto out;
 
        sector = (em->block_start + start - em->start) >> inode->i_blkbits;