Btrfs: Fix an off by one in the extent_map prepare write code
[safe/jmp/linux-2.6] / fs / btrfs / extent_map.c
index 89d0ebd..f53d8c9 100644 (file)
@@ -9,6 +9,9 @@
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
 #include <linux/swap.h>
+#include <linux/version.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
 #include "extent_map.h"
 
 /* temporary define until extent_map moves out of btrfs */
@@ -34,20 +37,39 @@ struct tree_entry {
        struct rb_node rb_node;
 };
 
-void __init extent_map_init(void)
+struct extent_page_data {
+       struct bio *bio;
+       struct extent_map_tree *tree;
+       get_extent_t *get_extent;
+};
+
+int __init extent_map_init(void)
 {
        extent_map_cache = btrfs_cache_create("extent_map",
                                            sizeof(struct extent_map), 0,
                                            NULL);
+       if (!extent_map_cache)
+               return -ENOMEM;
        extent_state_cache = btrfs_cache_create("extent_state",
                                            sizeof(struct extent_state), 0,
                                            NULL);
+       if (!extent_state_cache)
+               goto free_map_cache;
        extent_buffer_cache = btrfs_cache_create("extent_buffers",
                                            sizeof(struct extent_buffer), 0,
                                            NULL);
+       if (!extent_buffer_cache)
+               goto free_state_cache;
+       return 0;
+
+free_state_cache:
+       kmem_cache_destroy(extent_state_cache);
+free_map_cache:
+       kmem_cache_destroy(extent_map_cache);
+       return -ENOMEM;
 }
 
-void __exit extent_map_exit(void)
+void extent_map_exit(void)
 {
        struct extent_state *state;
 
@@ -73,6 +95,7 @@ void extent_map_tree_init(struct extent_map_tree *tree,
        tree->map.rb_node = NULL;
        tree->state.rb_node = NULL;
        tree->ops = NULL;
+       tree->dirty_bytes = 0;
        rwlock_init(&tree->lock);
        spin_lock_init(&tree->lru_lock);
        tree->mapping = mapping;
@@ -87,7 +110,7 @@ void extent_map_tree_empty_lru(struct extent_map_tree *tree)
        while(!list_empty(&tree->buffer_lru)) {
                eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
                                lru);
-               list_del(&eb->lru);
+               list_del_init(&eb->lru);
                free_extent_buffer(eb);
        }
 }
@@ -262,7 +285,12 @@ int add_extent_mapping(struct extent_map_tree *tree,
                if (prev && prev->end + 1 == em->start &&
                    ((em->block_start == EXTENT_MAP_HOLE &&
                      prev->block_start == EXTENT_MAP_HOLE) ||
-                            (em->block_start == prev->block_end + 1))) {
+                    (em->block_start == EXTENT_MAP_INLINE &&
+                     prev->block_start == EXTENT_MAP_INLINE) ||
+                    (em->block_start == EXTENT_MAP_DELALLOC &&
+                     prev->block_start == EXTENT_MAP_DELALLOC) ||
+                    (em->block_start < EXTENT_MAP_DELALLOC - 1 &&
+                     em->block_start == prev->block_end + 1))) {
                        em->start = prev->start;
                        em->block_start = prev->block_start;
                        rb_erase(&prev->rb_node, &tree->map);
@@ -388,6 +416,8 @@ static int insert_state(struct extent_map_tree *tree,
                printk("end < start %Lu %Lu\n", end, start);
                WARN_ON(1);
        }
+       if (bits & EXTENT_DIRTY)
+               tree->dirty_bytes += end - start + 1;
        state->state |= bits;
        state->start = start;
        state->end = end;
@@ -450,6 +480,12 @@ static int clear_state_bit(struct extent_map_tree *tree,
                            int delete)
 {
        int ret = state->state & bits;
+
+       if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
+               u64 range = state->end - state->start + 1;
+               WARN_ON(range > tree->dirty_bytes);
+               tree->dirty_bytes -= range;
+       }
        state->state &= ~bits;
        if (wake)
                wake_up(&state->wq);
@@ -642,6 +678,17 @@ out:
 }
 EXPORT_SYMBOL(wait_extent_bit);
 
+static void set_state_bits(struct extent_map_tree *tree,
+                          struct extent_state *state,
+                          int bits)
+{
+       if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
+               u64 range = state->end - state->start + 1;
+               tree->dirty_bytes += range;
+       }
+       state->state |= bits;
+}
+
 /*
  * set some bits on a range in the tree.  This may require allocations
  * or sleeping, so the gfp mask is used to indicate what is allowed.
@@ -701,7 +748,7 @@ again:
                        err = -EEXIST;
                        goto out;
                }
-               state->state |= bits;
+               set_state_bits(tree, state, bits);
                start = state->end + 1;
                merge_state(tree, state);
                goto search_again;
@@ -736,7 +783,7 @@ again:
                if (err)
                        goto out;
                if (state->end <= end) {
-                       state->state |= bits;
+                       set_state_bits(tree, state, bits);
                        start = state->end + 1;
                        merge_state(tree, state);
                } else {
@@ -782,7 +829,7 @@ again:
                err = split_state(tree, state, prealloc, end + 1);
                BUG_ON(err == -EEXIST);
 
-               prealloc->state |= bits;
+               set_state_bits(tree, prealloc, bits);
                merge_state(tree, prealloc);
                prealloc = NULL;
                goto out;
@@ -1007,11 +1054,11 @@ out:
 EXPORT_SYMBOL(find_first_extent_bit);
 
 u64 find_lock_delalloc_range(struct extent_map_tree *tree,
-                            u64 start, u64 lock_start, u64 *end, u64 max_bytes)
+                            u64 *start, u64 *end, u64 max_bytes)
 {
        struct rb_node *node;
        struct extent_state *state;
-       u64 cur_start = start;
+       u64 cur_start = *start;
        u64 found = 0;
        u64 total_bytes = 0;
 
@@ -1023,37 +1070,58 @@ u64 find_lock_delalloc_range(struct extent_map_tree *tree,
 search_again:
        node = tree_search(&tree->state, cur_start);
        if (!node || IS_ERR(node)) {
+               *end = (u64)-1;
                goto out;
        }
 
        while(1) {
                state = rb_entry(node, struct extent_state, rb_node);
-               if (state->start != cur_start) {
+               if (found && state->start != cur_start) {
                        goto out;
                }
                if (!(state->state & EXTENT_DELALLOC)) {
+                       if (!found)
+                               *end = state->end;
                        goto out;
                }
-               if (state->start >= lock_start) {
-                       if (state->state & EXTENT_LOCKED) {
-                               DEFINE_WAIT(wait);
-                               atomic_inc(&state->refs);
-                               write_unlock_irq(&tree->lock);
-                               schedule();
-                               write_lock_irq(&tree->lock);
-                               finish_wait(&state->wq, &wait);
-                               free_extent_state(state);
-                               goto search_again;
+               if (!found) {
+                       struct extent_state *prev_state;
+                       struct rb_node *prev_node = node;
+                       while(1) {
+                               prev_node = rb_prev(prev_node);
+                               if (!prev_node)
+                                       break;
+                               prev_state = rb_entry(prev_node,
+                                                     struct extent_state,
+                                                     rb_node);
+                               if (!(prev_state->state & EXTENT_DELALLOC))
+                                       break;
+                               state = prev_state;
+                               node = prev_node;
                        }
-                       state->state |= EXTENT_LOCKED;
                }
+               if (state->state & EXTENT_LOCKED) {
+                       DEFINE_WAIT(wait);
+                       atomic_inc(&state->refs);
+                       prepare_to_wait(&state->wq, &wait,
+                                       TASK_UNINTERRUPTIBLE);
+                       write_unlock_irq(&tree->lock);
+                       schedule();
+                       write_lock_irq(&tree->lock);
+                       finish_wait(&state->wq, &wait);
+                       free_extent_state(state);
+                       goto search_again;
+               }
+               state->state |= EXTENT_LOCKED;
+               if (!found)
+                       *start = state->start;
                found++;
                *end = state->end;
                cur_start = state->end + 1;
                node = rb_next(node);
                if (!node)
                        break;
-               total_bytes = state->end - state->start + 1;
+               total_bytes += state->end - state->start + 1;
                if (total_bytes >= max_bytes)
                        break;
        }
@@ -1062,6 +1130,58 @@ out:
        return found;
 }
 
+u64 count_range_bits(struct extent_map_tree *tree,
+                    u64 *start, u64 search_end, u64 max_bytes,
+                    unsigned long bits)
+{
+       struct rb_node *node;
+       struct extent_state *state;
+       u64 cur_start = *start;
+       u64 total_bytes = 0;
+       int found = 0;
+
+       if (search_end <= cur_start) {
+               printk("search_end %Lu start %Lu\n", search_end, cur_start);
+               WARN_ON(1);
+               return 0;
+       }
+
+       write_lock_irq(&tree->lock);
+       if (cur_start == 0 && bits == EXTENT_DIRTY) {
+               total_bytes = tree->dirty_bytes;
+               goto out;
+       }
+       /*
+        * this search will find all the extents that end after
+        * our range starts.
+        */
+       node = tree_search(&tree->state, cur_start);
+       if (!node || IS_ERR(node)) {
+               goto out;
+       }
+
+       while(1) {
+               state = rb_entry(node, struct extent_state, rb_node);
+               if (state->start > search_end)
+                       break;
+               if (state->end >= cur_start && (state->state & bits)) {
+                       total_bytes += min(search_end, state->end) + 1 -
+                                      max(cur_start, state->start);
+                       if (total_bytes >= max_bytes)
+                               break;
+                       if (!found) {
+                               *start = state->start;
+                               found = 1;
+                       }
+               }
+               node = rb_next(node);
+               if (!node)
+                       break;
+       }
+out:
+       write_unlock_irq(&tree->lock);
+       return total_bytes;
+}
 /*
  * helper function to lock both pages and extents in the tree.
  * pages must be locked first.
@@ -1196,13 +1316,15 @@ int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
        node = tree_search(&tree->state, start);
        while (node && start <= end) {
                state = rb_entry(node, struct extent_state, rb_node);
-               if (state->start > end)
-                       break;
 
                if (filled && state->start > start) {
                        bitset = 0;
                        break;
                }
+
+               if (state->start > end)
+                       break;
+
                if (state->state & bits) {
                        bitset = 1;
                        if (!filled)
@@ -1228,7 +1350,7 @@ EXPORT_SYMBOL(test_range_bit);
 static int check_page_uptodate(struct extent_map_tree *tree,
                               struct page *page)
 {
-       u64 start = page->index << PAGE_CACHE_SHIFT;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 end = start + PAGE_CACHE_SIZE - 1;
        if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
                SetPageUptodate(page);
@@ -1242,7 +1364,7 @@ static int check_page_uptodate(struct extent_map_tree *tree,
 static int check_page_locked(struct extent_map_tree *tree,
                             struct page *page)
 {
-       u64 start = page->index << PAGE_CACHE_SHIFT;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 end = start + PAGE_CACHE_SIZE - 1;
        if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
                unlock_page(page);
@@ -1256,7 +1378,7 @@ static int check_page_locked(struct extent_map_tree *tree,
 static int check_page_writeback(struct extent_map_tree *tree,
                             struct page *page)
 {
-       u64 start = page->index << PAGE_CACHE_SHIFT;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 end = start + PAGE_CACHE_SIZE - 1;
        if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
                end_page_writeback(page);
@@ -1274,8 +1396,12 @@ static int check_page_writeback(struct extent_map_tree *tree,
  * Scheduling is not allowed, so the extent state tree is expected
  * to have one and only one object corresponding to this IO.
  */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_extent_writepage(struct bio *bio, int err)
+#else
 static int end_bio_extent_writepage(struct bio *bio,
                                   unsigned int bytes_done, int err)
+#endif
 {
        const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
@@ -1284,12 +1410,15 @@ static int end_bio_extent_writepage(struct bio *bio,
        u64 end;
        int whole_page;
 
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
        if (bio->bi_size)
                return 1;
+#endif
 
        do {
                struct page *page = bvec->bv_page;
-               start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
+               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
+                        bvec->bv_offset;
                end = start + bvec->bv_len - 1;
 
                if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
@@ -1316,7 +1445,9 @@ static int end_bio_extent_writepage(struct bio *bio,
        } while (bvec >= bio->bi_io_vec);
 
        bio_put(bio);
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
        return 0;
+#endif
 }
 
 /*
@@ -1330,8 +1461,12 @@ static int end_bio_extent_writepage(struct bio *bio,
  * Scheduling is not allowed, so the extent state tree is expected
  * to have one and only one object corresponding to this IO.
  */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_extent_readpage(struct bio *bio, int err)
+#else
 static int end_bio_extent_readpage(struct bio *bio,
                                   unsigned int bytes_done, int err)
+#endif
 {
        int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
@@ -1341,12 +1476,15 @@ static int end_bio_extent_readpage(struct bio *bio,
        int whole_page;
        int ret;
 
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
        if (bio->bi_size)
                return 1;
+#endif
 
        do {
                struct page *page = bvec->bv_page;
-               start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
+               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
+                       bvec->bv_offset;
                end = start + bvec->bv_len - 1;
 
                if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
@@ -1382,7 +1520,9 @@ static int end_bio_extent_readpage(struct bio *bio,
        } while (bvec >= bio->bi_io_vec);
 
        bio_put(bio);
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
        return 0;
+#endif
 }
 
 /*
@@ -1390,8 +1530,12 @@ static int end_bio_extent_readpage(struct bio *bio,
  * the structs in the extent tree when done, and set the uptodate bits
  * as appropriate.
  */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_extent_preparewrite(struct bio *bio, int err)
+#else
 static int end_bio_extent_preparewrite(struct bio *bio,
                                       unsigned int bytes_done, int err)
+#endif
 {
        const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
@@ -1399,12 +1543,15 @@ static int end_bio_extent_preparewrite(struct bio *bio,
        u64 start;
        u64 end;
 
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
        if (bio->bi_size)
                return 1;
+#endif
 
        do {
                struct page *page = bvec->bv_page;
-               start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
+               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
+                       bvec->bv_offset;
                end = start + bvec->bv_len - 1;
 
                if (--bvec >= bio->bi_io_vec)
@@ -1422,43 +1569,91 @@ static int end_bio_extent_preparewrite(struct bio *bio,
        } while (bvec >= bio->bi_io_vec);
 
        bio_put(bio);
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
        return 0;
+#endif
 }
 
-static int submit_extent_page(int rw, struct extent_map_tree *tree,
-                             struct page *page, sector_t sector,
-                             size_t size, unsigned long offset,
-                             struct block_device *bdev,
-                             bio_end_io_t end_io_func)
+static struct bio *
+extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+                gfp_t gfp_flags)
 {
        struct bio *bio;
-       int ret = 0;
 
-       bio = bio_alloc(GFP_NOIO, 1);
+       bio = bio_alloc(gfp_flags, nr_vecs);
 
-       bio->bi_sector = sector;
-       bio->bi_bdev = bdev;
-       bio->bi_io_vec[0].bv_page = page;
-       bio->bi_io_vec[0].bv_len = size;
-       bio->bi_io_vec[0].bv_offset = offset;
+       if (bio == NULL && (current->flags & PF_MEMALLOC)) {
+               while (!bio && (nr_vecs /= 2))
+                       bio = bio_alloc(gfp_flags, nr_vecs);
+       }
 
-       bio->bi_vcnt = 1;
-       bio->bi_idx = 0;
-       bio->bi_size = size;
+       if (bio) {
+               bio->bi_bdev = bdev;
+               bio->bi_sector = first_sector;
+       }
+       return bio;
+}
 
-       bio->bi_end_io = end_io_func;
-       bio->bi_private = tree;
+static int submit_one_bio(int rw, struct bio *bio)
+{
+       u64 maxsector;
+       int ret = 0;
 
        bio_get(bio);
-       submit_bio(rw, bio);
 
+        maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
+       if (maxsector < bio->bi_sector) {
+               printk("sector too large max %Lu got %llu\n", maxsector,
+                       (unsigned long long)bio->bi_sector);
+               WARN_ON(1);
+       }
+
+       submit_bio(rw, bio);
        if (bio_flagged(bio, BIO_EOPNOTSUPP))
                ret = -EOPNOTSUPP;
-
        bio_put(bio);
        return ret;
 }
 
+static int submit_extent_page(int rw, struct extent_map_tree *tree,
+                             struct page *page, sector_t sector,
+                             size_t size, unsigned long offset,
+                             struct block_device *bdev,
+                             struct bio **bio_ret,
+                             unsigned long max_pages,
+                             bio_end_io_t end_io_func)
+{
+       int ret = 0;
+       struct bio *bio;
+       int nr;
+
+       if (bio_ret && *bio_ret) {
+               bio = *bio_ret;
+               if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
+                   bio_add_page(bio, page, size, offset) < size) {
+                       ret = submit_one_bio(rw, bio);
+                       bio = NULL;
+               } else {
+                       return 0;
+               }
+       }
+       nr = min_t(int, max_pages, bio_get_nr_vecs(bdev));
+       bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+       if (!bio) {
+               printk("failed to allocate bio nr %d\n", nr);
+       }
+       bio_add_page(bio, page, size, offset);
+       bio->bi_end_io = end_io_func;
+       bio->bi_private = tree;
+       if (bio_ret) {
+               *bio_ret = bio;
+       } else {
+               ret = submit_one_bio(rw, bio);
+       }
+
+       return ret;
+}
+
 void set_page_extent_mapped(struct page *page)
 {
        if (!PagePrivate(page)) {
@@ -1474,11 +1669,13 @@ void set_page_extent_mapped(struct page *page)
  * into the tree that are removed when the IO is done (by the end_io
  * handlers)
  */
-int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
-                         get_extent_t *get_extent)
+static int __extent_read_full_page(struct extent_map_tree *tree,
+                                  struct page *page,
+                                  get_extent_t *get_extent,
+                                  struct bio **bio)
 {
        struct inode *inode = page->mapping->host;
-       u64 start = page->index << PAGE_CACHE_SHIFT;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 page_end = start + PAGE_CACHE_SIZE - 1;
        u64 end;
        u64 cur = start;
@@ -1502,8 +1699,12 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
 
        while (cur <= end) {
                if (cur >= last_byte) {
+                       char *userpage;
                        iosize = PAGE_CACHE_SIZE - page_offset;
-                       zero_user_page(page, page_offset, iosize, KM_USER0);
+                       userpage = kmap_atomic(page, KM_USER0);
+                       memset(userpage + page_offset, 0, iosize);
+                       flush_dcache_page(page);
+                       kunmap_atomic(userpage, KM_USER0);
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
                                            GFP_NOFS);
                        unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
@@ -1531,7 +1732,12 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
 
                /* we've found a hole, just zero and go on */
                if (block_start == EXTENT_MAP_HOLE) {
-                       zero_user_page(page, page_offset, iosize, KM_USER0);
+                       char *userpage;
+                       userpage = kmap_atomic(page, KM_USER0);
+                       memset(userpage + page_offset, 0, iosize);
+                       flush_dcache_page(page);
+                       kunmap_atomic(userpage, KM_USER0);
+
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
                                            GFP_NOFS);
                        unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
@@ -1553,9 +1759,12 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
                                                          cur + iosize - 1);
                }
                if (!ret) {
+                       unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+                       nr -= page->index;
                        ret = submit_extent_page(READ, tree, page,
-                                                sector, iosize, page_offset,
-                                                bdev, end_bio_extent_readpage);
+                                        sector, iosize, page_offset,
+                                        bdev, bio, nr,
+                                        end_bio_extent_readpage);
                }
                if (ret)
                        SetPageError(page);
@@ -1570,6 +1779,18 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
        }
        return 0;
 }
+
+int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
+                           get_extent_t *get_extent)
+{
+       struct bio *bio = NULL;
+       int ret;
+
+       ret = __extent_read_full_page(tree, page, get_extent, &bio);
+       if (bio)
+               submit_one_bio(READ, bio);
+       return ret;
+}
 EXPORT_SYMBOL(extent_read_full_page);
 
 /*
@@ -1578,25 +1799,27 @@ EXPORT_SYMBOL(extent_read_full_page);
  * are found, they are marked writeback.  Then the lock bits are removed
  * and the end_io handler clears the writeback ranges
  */
-int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
-                         get_extent_t *get_extent,
-                         struct writeback_control *wbc)
+static int __extent_writepage(struct page *page, struct writeback_control *wbc,
+                             void *data)
 {
        struct inode *inode = page->mapping->host;
-       u64 start = page->index << PAGE_CACHE_SHIFT;
+       struct extent_page_data *epd = data;
+       struct extent_map_tree *tree = epd->tree;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+       u64 delalloc_start;
        u64 page_end = start + PAGE_CACHE_SIZE - 1;
        u64 end;
        u64 cur = start;
        u64 extent_offset;
        u64 last_byte = i_size_read(inode);
        u64 block_start;
+       u64 iosize;
        sector_t sector;
        struct extent_map *em;
        struct block_device *bdev;
        int ret;
        int nr = 0;
        size_t page_offset = 0;
-       size_t iosize;
        size_t blocksize;
        loff_t i_size = i_size_read(inode);
        unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
@@ -1611,32 +1834,37 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
        }
 
        if (page->index == end_index) {
+               char *userpage;
+
                size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
-               zero_user_page(page, offset,
-                              PAGE_CACHE_SIZE - offset, KM_USER0);
+
+               userpage = kmap_atomic(page, KM_USER0);
+               memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset);
+               flush_dcache_page(page);
+               kunmap_atomic(userpage, KM_USER0);
        }
 
        set_page_extent_mapped(page);
 
-       lock_extent(tree, start, page_end, GFP_NOFS);
-       nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1,
-                                              &delalloc_end,
-                                              128 * 1024 * 1024);
-       if (nr_delalloc) {
-               tree->ops->fill_delalloc(inode, start, delalloc_end);
-               if (delalloc_end >= page_end + 1) {
-                       clear_extent_bit(tree, page_end + 1, delalloc_end,
-                                        EXTENT_LOCKED | EXTENT_DELALLOC,
-                                        1, 0, GFP_NOFS);
-               }
-               clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC,
-                                0, 0, GFP_NOFS);
-               if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
-                       printk("found delalloc bits after clear extent_bit\n");
+       delalloc_start = start;
+       delalloc_end = 0;
+       while(delalloc_end < page_end) {
+               nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start,
+                                                      &delalloc_end,
+                                                      128 * 1024 * 1024);
+               if (nr_delalloc == 0) {
+                       delalloc_start = delalloc_end + 1;
+                       continue;
                }
-       } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
-               printk("found delalloc bits after find_delalloc_range returns 0\n");
+               tree->ops->fill_delalloc(inode, delalloc_start,
+                                        delalloc_end);
+               clear_extent_bit(tree, delalloc_start,
+                                delalloc_end,
+                                EXTENT_LOCKED | EXTENT_DELALLOC,
+                                1, 0, GFP_NOFS);
+               delalloc_start = delalloc_end + 1;
        }
+       lock_extent(tree, start, page_end, GFP_NOFS);
 
        end = page_end;
        if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
@@ -1656,7 +1884,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
                        clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
                        break;
                }
-               em = get_extent(inode, page, page_offset, cur, end, 0);
+               em = epd->get_extent(inode, page, page_offset, cur, end, 1);
                if (IS_ERR(em) || !em) {
                        SetPageError(page);
                        break;
@@ -1699,9 +1927,18 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
                if (ret)
                        SetPageError(page);
                else {
+                       unsigned long max_nr = end_index + 1;
                        set_range_writeback(tree, cur, cur + iosize - 1);
+                       if (!PageWriteback(page)) {
+                               printk("warning page %lu not writeback, "
+                                      "cur %llu end %llu\n", page->index,
+                                      (unsigned long long)cur,
+                                      (unsigned long long)end);
+                       }
+
                        ret = submit_extent_page(WRITE, tree, page, sector,
                                                 iosize, page_offset, bdev,
+                                                &epd->bio, max_nr,
                                                 end_bio_extent_writepage);
                        if (ret)
                                SetPageError(page);
@@ -1711,12 +1948,230 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
                nr++;
        }
 done:
+       if (nr == 0) {
+               /* make sure the mapping tag for page dirty gets cleared */
+               set_page_writeback(page);
+               end_page_writeback(page);
+       }
        unlock_extent(tree, start, page_end, GFP_NOFS);
        unlock_page(page);
        return 0;
 }
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+
+/* Taken directly from 2.6.23 for 2.6.18 back port */
+typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
+                                void *data);
+
+/**
+ * write_cache_pages - walk the list of dirty pages of the given address space
+ * and write all of them.
+ * @mapping: address space structure to write
+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
+ * @writepage: function called for each page
+ * @data: data passed to writepage function
+ *
+ * If a page is already under I/O, write_cache_pages() skips it, even
+ * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
+ * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
+ * and msync() need to guarantee that all the data which was dirty at the time
+ * the call was made get new I/O started against them.  If wbc->sync_mode is
+ * WB_SYNC_ALL then we were called for data integrity and we must wait for
+ * existing IO to complete.
+ */
+static int write_cache_pages(struct address_space *mapping,
+                     struct writeback_control *wbc, writepage_t writepage,
+                     void *data)
+{
+       struct backing_dev_info *bdi = mapping->backing_dev_info;
+       int ret = 0;
+       int done = 0;
+       struct pagevec pvec;
+       int nr_pages;
+       pgoff_t index;
+       pgoff_t end;            /* Inclusive */
+       int scanned = 0;
+       int range_whole = 0;
+
+       if (wbc->nonblocking && bdi_write_congested(bdi)) {
+               wbc->encountered_congestion = 1;
+               return 0;
+       }
+
+       pagevec_init(&pvec, 0);
+       if (wbc->range_cyclic) {
+               index = mapping->writeback_index; /* Start from prev offset */
+               end = -1;
+       } else {
+               index = wbc->range_start >> PAGE_CACHE_SHIFT;
+               end = wbc->range_end >> PAGE_CACHE_SHIFT;
+               if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+                       range_whole = 1;
+               scanned = 1;
+       }
+retry:
+       while (!done && (index <= end) &&
+              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                                             PAGECACHE_TAG_DIRTY,
+                                             min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+               unsigned i;
+
+               scanned = 1;
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+
+                       /*
+                        * At this point we hold neither mapping->tree_lock nor
+                        * lock on the page itself: the page may be truncated or
+                        * invalidated (changing page->mapping to NULL), or even
+                        * swizzled back from swapper_space to tmpfs file
+                        * mapping
+                        */
+                       lock_page(page);
+
+                       if (unlikely(page->mapping != mapping)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (!wbc->range_cyclic && page->index > end) {
+                               done = 1;
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (wbc->sync_mode != WB_SYNC_NONE)
+                               wait_on_page_writeback(page);
+
+                       if (PageWriteback(page) ||
+                           !clear_page_dirty_for_io(page)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       ret = (*writepage)(page, wbc, data);
+
+                       if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
+                               unlock_page(page);
+                               ret = 0;
+                       }
+                       if (ret || (--(wbc->nr_to_write) <= 0))
+                               done = 1;
+                       if (wbc->nonblocking && bdi_write_congested(bdi)) {
+                               wbc->encountered_congestion = 1;
+                               done = 1;
+                       }
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+       if (!scanned && !done) {
+               /*
+                * We hit the last page and there is more work to be done: wrap
+                * back to the start of the file
+                */
+               scanned = 1;
+               index = 0;
+               goto retry;
+       }
+       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+               mapping->writeback_index = index;
+       return ret;
+}
+#endif
+
+int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
+                         get_extent_t *get_extent,
+                         struct writeback_control *wbc)
+{
+       int ret;
+       struct address_space *mapping = page->mapping;
+       struct extent_page_data epd = {
+               .bio = NULL,
+               .tree = tree,
+               .get_extent = get_extent,
+       };
+       struct writeback_control wbc_writepages = {
+               .bdi            = wbc->bdi,
+               .sync_mode      = WB_SYNC_NONE,
+               .older_than_this = NULL,
+               .nr_to_write    = 64,
+               .range_start    = page_offset(page) + PAGE_CACHE_SIZE,
+               .range_end      = (loff_t)-1,
+       };
+
+
+       ret = __extent_writepage(page, wbc, &epd);
+
+       write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
+       if (epd.bio) {
+               submit_one_bio(WRITE, epd.bio);
+       }
+       return ret;
+}
 EXPORT_SYMBOL(extent_write_full_page);
 
+
+int extent_writepages(struct extent_map_tree *tree,
+                     struct address_space *mapping,
+                     get_extent_t *get_extent,
+                     struct writeback_control *wbc)
+{
+       int ret = 0;
+       struct extent_page_data epd = {
+               .bio = NULL,
+               .tree = tree,
+               .get_extent = get_extent,
+       };
+
+       ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
+       if (epd.bio) {
+               submit_one_bio(WRITE, epd.bio);
+       }
+       return ret;
+}
+EXPORT_SYMBOL(extent_writepages);
+
+int extent_readpages(struct extent_map_tree *tree,
+                    struct address_space *mapping,
+                    struct list_head *pages, unsigned nr_pages,
+                    get_extent_t get_extent)
+{
+       struct bio *bio = NULL;
+       unsigned page_idx;
+       struct pagevec pvec;
+
+       pagevec_init(&pvec, 0);
+       for (page_idx = 0; page_idx < nr_pages; page_idx++) {
+               struct page *page = list_entry(pages->prev, struct page, lru);
+
+               prefetchw(&page->flags);
+               list_del(&page->lru);
+               /*
+                * what we want to do here is call add_to_page_cache_lru,
+                * but that isn't exported, so we reproduce it here
+                */
+               if (!add_to_page_cache(page, mapping,
+                                       page->index, GFP_KERNEL)) {
+
+                       /* open coding of lru_cache_add, also not exported */
+                       page_cache_get(page);
+                       if (!pagevec_add(&pvec, page))
+                               __pagevec_lru_add(&pvec);
+                       __extent_read_full_page(tree, page, get_extent, &bio);
+               }
+               page_cache_release(page);
+       }
+       if (pagevec_count(&pvec))
+               __pagevec_lru_add(&pvec);
+       BUG_ON(!list_empty(pages));
+       if (bio)
+               submit_one_bio(READ, bio);
+       return 0;
+}
+EXPORT_SYMBOL(extent_readpages);
+
 /*
  * basic invalidatepage code, this waits on any locked or writeback
  * ranges corresponding to the page, and then deletes any extent state
@@ -1725,7 +2180,7 @@ EXPORT_SYMBOL(extent_write_full_page);
 int extent_invalidatepage(struct extent_map_tree *tree,
                          struct page *page, unsigned long offset)
 {
-       u64 start = (page->index << PAGE_CACHE_SHIFT);
+       u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
        u64 end = start + PAGE_CACHE_SIZE - 1;
        size_t blocksize = page->mapping->host->i_sb->s_blocksize;
 
@@ -1767,7 +2222,7 @@ int extent_prepare_write(struct extent_map_tree *tree,
                         struct inode *inode, struct page *page,
                         unsigned from, unsigned to, get_extent_t *get_extent)
 {
-       u64 page_start = page->index << PAGE_CACHE_SHIFT;
+       u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
        u64 block_start;
        u64 orig_block_start;
@@ -1814,7 +2269,9 @@ int extent_prepare_write(struct extent_map_tree *tree,
                        flush_dcache_page(page);
                        kunmap_atomic(kaddr, KM_USER0);
                }
-               if (!isnew && !PageUptodate(page) &&
+               if ((em->block_start != EXTENT_MAP_HOLE &&
+                    em->block_start != EXTENT_MAP_INLINE) &&
+                   !isnew && !PageUptodate(page) &&
                    (block_off_end > to || block_off_start < from) &&
                    !test_range_bit(tree, block_start, cur_end,
                                    EXTENT_UPTODATE, 1)) {
@@ -1822,7 +2279,7 @@ int extent_prepare_write(struct extent_map_tree *tree,
                        u64 extent_offset = block_start - em->start;
                        size_t iosize;
                        sector = (em->block_start + extent_offset) >> 9;
-                       iosize = (cur_end - block_start + blocksize - 1) &
+                       iosize = (cur_end - block_start + blocksize) &
                                ~((u64)blocksize - 1);
                        /*
                         * we've already got the extent locked, but we
@@ -1834,6 +2291,7 @@ int extent_prepare_write(struct extent_map_tree *tree,
                                       EXTENT_LOCKED, 0, NULL, GFP_NOFS);
                        ret = submit_extent_page(READ, tree, page,
                                         sector, iosize, page_offset, em->bdev,
+                                        NULL, 1,
                                         end_bio_extent_preparewrite);
                        iocount++;
                        block_start = block_start + iosize;
@@ -1865,7 +2323,7 @@ EXPORT_SYMBOL(extent_prepare_write);
 int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page)
 {
        struct extent_map *em;
-       u64 start = page->index << PAGE_CACHE_SHIFT;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 end = start + PAGE_CACHE_SIZE - 1;
        u64 orig_start = start;
        int ret = 1;
@@ -1899,6 +2357,7 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
        struct inode *inode = mapping->host;
        u64 start = iblock << inode->i_blkbits;
        u64 end = start + (1 << inode->i_blkbits) - 1;
+       sector_t sector = 0;
        struct extent_map *em;
 
        em = get_extent(inode, NULL, 0, start, end, 0);
@@ -1907,9 +2366,12 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
 
        if (em->block_start == EXTENT_MAP_INLINE ||
            em->block_start == EXTENT_MAP_HOLE)
-               return 0;
+               goto out;
 
-       return (em->block_start + start - em->start) >> inode->i_blkbits;
+       sector = (em->block_start + start - em->start) >> inode->i_blkbits;
+out:
+       free_extent_map(em);
+       return sector;
 }
 
 static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb)
@@ -1923,7 +2385,7 @@ static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb)
                        rm = list_entry(tree->buffer_lru.prev,
                                        struct extent_buffer, lru);
                        tree->lru_size--;
-                       list_del(&rm->lru);
+                       list_del_init(&rm->lru);
                        free_extent_buffer(rm);
                }
        } else
@@ -1961,12 +2423,15 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb,
                                              unsigned long i)
 {
        struct page *p;
+       struct address_space *mapping;
 
        if (i == 0)
                return eb->first_page;
        i += eb->start >> PAGE_CACHE_SHIFT;
-       p = find_get_page(eb->first_page->mapping, i);
-       page_cache_release(p);
+       mapping = eb->first_page->mapping;
+       read_lock_irq(&mapping->tree_lock);
+       p = radix_tree_lookup(&mapping->page_tree, i);
+       read_unlock_irq(&mapping->tree_lock);
        return p;
 }
 
@@ -1979,25 +2444,17 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree,
 
        spin_lock(&tree->lru_lock);
        eb = find_lru(tree, start, len);
-       if (eb) {
-               goto lru_add;
-       }
        spin_unlock(&tree->lru_lock);
-
        if (eb) {
-               memset(eb, 0, sizeof(*eb));
-       } else {
-               eb = kmem_cache_zalloc(extent_buffer_cache, mask);
+               return eb;
        }
+
+       eb = kmem_cache_zalloc(extent_buffer_cache, mask);
        INIT_LIST_HEAD(&eb->lru);
        eb->start = start;
        eb->len = len;
        atomic_set(&eb->refs, 1);
 
-       spin_lock(&tree->lru_lock);
-lru_add:
-       add_lru(tree, eb);
-       spin_unlock(&tree->lru_lock);
        return eb;
 }
 
@@ -2017,14 +2474,14 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
        struct extent_buffer *eb;
        struct page *p;
        struct address_space *mapping = tree->mapping;
-       int uptodate = 0;
+       int uptodate = 1;
 
        eb = __alloc_extent_buffer(tree, start, len, mask);
        if (!eb || IS_ERR(eb))
                return NULL;
 
        if (eb->flags & EXTENT_BUFFER_FILLED)
-               return eb;
+               goto lru_add;
 
        if (page0) {
                eb->first_page = page0;
@@ -2033,6 +2490,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
                page_cache_get(page0);
                mark_page_accessed(page0);
                set_page_extent_mapped(page0);
+               WARN_ON(!PageUptodate(page0));
                set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE |
                                 len << 2);
        } else {
@@ -2042,11 +2500,6 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
                p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
                if (!p) {
                        WARN_ON(1);
-                       /* make sure the free only frees the pages we've
-                        * grabbed a reference on
-                        */
-                       eb->len = i << PAGE_CACHE_SHIFT;
-                       eb->start &= ~((u64)PAGE_CACHE_SIZE - 1);
                        goto fail;
                }
                set_page_extent_mapped(p);
@@ -2065,9 +2518,25 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
        if (uptodate)
                eb->flags |= EXTENT_UPTODATE;
        eb->flags |= EXTENT_BUFFER_FILLED;
+
+lru_add:
+       spin_lock(&tree->lru_lock);
+       add_lru(tree, eb);
+       spin_unlock(&tree->lru_lock);
        return eb;
+
 fail:
-       free_extent_buffer(eb);
+       spin_lock(&tree->lru_lock);
+       list_del_init(&eb->lru);
+       spin_unlock(&tree->lru_lock);
+       if (!atomic_dec_and_test(&eb->refs))
+               return NULL;
+       for (index = 1; index < i; index++) {
+               page_cache_release(extent_buffer_page(eb, index));
+       }
+       if (i > 0)
+               page_cache_release(extent_buffer_page(eb, 0));
+       __free_extent_buffer(eb);
        return NULL;
 }
 EXPORT_SYMBOL(alloc_extent_buffer);
@@ -2077,7 +2546,8 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
                                          gfp_t mask)
 {
        unsigned long num_pages = num_extent_pages(start, len);
-       unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT;
+       unsigned long i;
+       unsigned long index = start >> PAGE_CACHE_SHIFT;
        struct extent_buffer *eb;
        struct page *p;
        struct address_space *mapping = tree->mapping;
@@ -2088,16 +2558,11 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
                return NULL;
 
        if (eb->flags & EXTENT_BUFFER_FILLED)
-               return eb;
+               goto lru_add;
 
        for (i = 0; i < num_pages; i++, index++) {
                p = find_lock_page(mapping, index);
                if (!p) {
-                       /* make sure the free only frees the pages we've
-                        * grabbed a reference on
-                        */
-                       eb->len = i << PAGE_CACHE_SHIFT;
-                       eb->start &= ~((u64)PAGE_CACHE_SIZE - 1);
                        goto fail;
                }
                set_page_extent_mapped(p);
@@ -2118,9 +2583,24 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
        if (uptodate)
                eb->flags |= EXTENT_UPTODATE;
        eb->flags |= EXTENT_BUFFER_FILLED;
+
+lru_add:
+       spin_lock(&tree->lru_lock);
+       add_lru(tree, eb);
+       spin_unlock(&tree->lru_lock);
        return eb;
 fail:
-       free_extent_buffer(eb);
+       spin_lock(&tree->lru_lock);
+       list_del_init(&eb->lru);
+       spin_unlock(&tree->lru_lock);
+       if (!atomic_dec_and_test(&eb->refs))
+               return NULL;
+       for (index = 1; index < i; index++) {
+               page_cache_release(extent_buffer_page(eb, index));
+       }
+       if (i > 0)
+               page_cache_release(extent_buffer_page(eb, 0));
+       __free_extent_buffer(eb);
        return NULL;
 }
 EXPORT_SYMBOL(find_extent_buffer);
@@ -2136,11 +2616,13 @@ void free_extent_buffer(struct extent_buffer *eb)
        if (!atomic_dec_and_test(&eb->refs))
                return;
 
+       WARN_ON(!list_empty(&eb->lru));
        num_pages = num_extent_pages(eb->start, eb->len);
 
-       for (i = 0; i < num_pages; i++) {
+       for (i = 1; i < num_pages; i++) {
                page_cache_release(extent_buffer_page(eb, i));
        }
+       page_cache_release(extent_buffer_page(eb, 0));
        __free_extent_buffer(eb);
 }
 EXPORT_SYMBOL(free_extent_buffer);
@@ -2169,8 +2651,8 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree,
                 */
                if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
                    ((i == num_pages - 1) &&
-                    ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) {
-                       start = page->index << PAGE_CACHE_SHIFT;
+                    ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
+                       start = (u64)page->index << PAGE_CACHE_SHIFT;
                        end  = start + PAGE_CACHE_SIZE - 1;
                        if (test_range_bit(tree, start, end,
                                           EXTENT_DIRTY, 0)) {
@@ -2179,6 +2661,13 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree,
                        }
                }
                clear_page_dirty_for_io(page);
+               write_lock_irq(&page->mapping->tree_lock);
+               if (!PageDirty(page)) {
+                       radix_tree_tag_clear(&page->mapping->page_tree,
+                                               page_index(page),
+                                               PAGECACHE_TAG_DIRTY);
+               }
+               write_unlock_irq(&page->mapping->tree_lock);
                unlock_page(page);
        }
        return 0;
@@ -2237,7 +2726,7 @@ int set_extent_buffer_uptodate(struct extent_map_tree *tree,
                page = extent_buffer_page(eb, i);
                if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
                    ((i == num_pages - 1) &&
-                    ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) {
+                    ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
                        check_page_uptodate(tree, page);
                        continue;
                }
@@ -2276,6 +2765,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree,
                           EXTENT_UPTODATE, 1)) {
                return 0;
        }
+
        if (start) {
                WARN_ON(start < eb->start);
                start_i = (start >> PAGE_CACHE_SHIFT) -
@@ -2340,9 +2830,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
 
-       offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
-       if (i == 0)
-               offset += start_offset;
+       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
 
        while(len > 0) {
                page = extent_buffer_page(eb, i);
@@ -2353,9 +2841,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
                WARN_ON(!PageUptodate(page));
 
                cur = min(len, (PAGE_CACHE_SIZE - offset));
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = kmap_atomic(page, KM_USER1);
                memcpy(dst, kaddr + offset, cur);
-               kunmap_atomic(kaddr, KM_USER0);
+               kunmap_atomic(kaddr, KM_USER1);
 
                dst += cur;
                len -= cur;
@@ -2375,7 +2863,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
        struct page *p;
        size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
        unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
-       unsigned long end_i = (start_offset + start + min_len) >>
+       unsigned long end_i = (start_offset + start + min_len - 1) >>
                PAGE_CACHE_SHIFT;
 
        if (i != end_i)
@@ -2386,9 +2874,9 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
                *map_start = 0;
        } else {
                offset = 0;
-               *map_start = (i << PAGE_CACHE_SHIFT) - start_offset;
+               *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
        }
-       if (start + min_len >= eb->len) {
+       if (start + min_len > eb->len) {
 printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
                WARN_ON(1);
        }
@@ -2450,9 +2938,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
 
-       offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
-       if (i == 0)
-               offset += start_offset;
+       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
 
        while(len > 0) {
                page = extent_buffer_page(eb, i);
@@ -2489,18 +2975,16 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
 
-       offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
-       if (i == 0)
-               offset += start_offset;
+       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
 
        while(len > 0) {
                page = extent_buffer_page(eb, i);
                WARN_ON(!PageUptodate(page));
 
                cur = min(len, PAGE_CACHE_SIZE - offset);
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = kmap_atomic(page, KM_USER1);
                memcpy(kaddr + offset, src, cur);
-               kunmap_atomic(kaddr, KM_USER0);
+               kunmap_atomic(kaddr, KM_USER1);
 
                src += cur;
                len -= cur;
@@ -2523,9 +3007,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
 
-       offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
-       if (i == 0)
-               offset += start_offset;
+       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
 
        while(len > 0) {
                page = extent_buffer_page(eb, i);
@@ -2557,9 +3039,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
 
        WARN_ON(src->len != dst_len);
 
-       offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1);
-       if (i == 0)
-               offset += start_offset;
+       offset = (start_offset + dst_offset) &
+               ((unsigned long)PAGE_CACHE_SIZE - 1);
 
        while(len > 0) {
                page = extent_buffer_page(dst, i);
@@ -2567,9 +3048,9 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
 
                cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
 
-               kaddr = kmap_atomic(page, KM_USER1);
+               kaddr = kmap_atomic(page, KM_USER0);
                read_extent_buffer(src, kaddr + offset, src_offset, cur);
-               kunmap_atomic(kaddr, KM_USER1);
+               kunmap_atomic(kaddr, KM_USER0);
 
                src_offset += cur;
                len -= cur;
@@ -2639,19 +3120,14 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
        }
 
        while(len > 0) {
-               dst_off_in_page = dst_offset &
+               dst_off_in_page = (start_offset + dst_offset) &
                        ((unsigned long)PAGE_CACHE_SIZE - 1);
-               src_off_in_page = src_offset &
+               src_off_in_page = (start_offset + src_offset) &
                        ((unsigned long)PAGE_CACHE_SIZE - 1);
 
                dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
                src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
 
-               if (src_i == 0)
-                       src_off_in_page += start_offset;
-               if (dst_i == 0)
-                       dst_off_in_page += start_offset;
-
                cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
                                               src_off_in_page));
                cur = min_t(unsigned long, cur,
@@ -2698,14 +3174,10 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
                dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
                src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
 
-               dst_off_in_page = dst_end &
+               dst_off_in_page = (start_offset + dst_end) &
                        ((unsigned long)PAGE_CACHE_SIZE - 1);
-               src_off_in_page = src_end &
+               src_off_in_page = (start_offset + src_end) &
                        ((unsigned long)PAGE_CACHE_SIZE - 1);
-               if (src_i == 0)
-                       src_off_in_page += start_offset;
-               if (dst_i == 0)
-                       dst_off_in_page += start_offset;
 
                cur = min_t(unsigned long, len, src_off_in_page + 1);
                cur = min(cur, dst_off_in_page + 1);