static LIST_HEAD(buffers);
static LIST_HEAD(states);
+
+#define LEAK_DEBUG 1
+#ifdef LEAK_DEBUG
static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED;
+#endif
#define BUFFER_LRU_MAX 64
struct bio *bio;
struct extent_io_tree *tree;
get_extent_t *get_extent;
+
+ /* tells writepage not to lock the state bits for this range
+ * it still does the unlocking
+ */
+ int extent_locked;
};
int __init extent_io_init(void)
struct extent_state *alloc_extent_state(gfp_t mask)
{
struct extent_state *state;
+#ifdef LEAK_DEBUG
unsigned long flags;
+#endif
state = kmem_cache_alloc(extent_state_cache, mask);
if (!state)
state->state = 0;
state->private = 0;
state->tree = NULL;
+#ifdef LEAK_DEBUG
spin_lock_irqsave(&leak_lock, flags);
list_add(&state->leak_list, &states);
spin_unlock_irqrestore(&leak_lock, flags);
-
+#endif
atomic_set(&state->refs, 1);
init_waitqueue_head(&state->wq);
return state;
if (!state)
return;
if (atomic_dec_and_test(&state->refs)) {
+#ifdef LEAK_DEBUG
unsigned long flags;
+#endif
WARN_ON(state->tree);
+#ifdef LEAK_DEBUG
spin_lock_irqsave(&leak_lock, flags);
list_del(&state->leak_list);
spin_unlock_irqrestore(&leak_lock, flags);
+#endif
kmem_cache_free(extent_state_cache, state);
}
}
struct extent_state *other;
struct rb_node *other_node;
- if (state->state & EXTENT_IOBITS)
+ if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
return 0;
other_node = rb_prev(&state->rb_node);
}
EXPORT_SYMBOL(wait_on_extent_writeback);
+/*
+ * either insert or lock state struct between start and end use mask to tell
+ * us if waiting is desired.
+ */
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
{
int err;
}
EXPORT_SYMBOL(lock_extent);
+int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ int err;
+ u64 failed_start;
+
+ err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
+ &failed_start, mask);
+ if (err == -EEXIST) {
+ if (failed_start > start)
+ clear_extent_bit(tree, start, failed_start - 1,
+ EXTENT_LOCKED, 1, 0, mask);
+ return 0;
+ }
+ return 1;
+}
+EXPORT_SYMBOL(try_lock_extent);
+
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
}
EXPORT_SYMBOL(set_range_writeback);
+/*
+ * find the first offset in the io tree with 'bits' set. zero is
+ * returned if we find something, and *start_ret and *end_ret are
+ * set to reflect the state struct that was found.
+ *
+ * If nothing was found, 1 is returned, < 0 on error
+ */
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, int bits)
{
}
EXPORT_SYMBOL(find_first_extent_bit);
+/* find the first state struct with 'bits' set after 'start', and
+ * return it. tree->lock must be held. NULL will returned if
+ * nothing was found after 'start'
+ */
struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
u64 start, int bits)
{
}
EXPORT_SYMBOL(find_first_extent_bit_state);
-u64 find_lock_delalloc_range(struct extent_io_tree *tree,
- u64 *start, u64 *end, u64 max_bytes)
+/*
+ * find a contiguous range of bytes in the file marked as delalloc, not
+ * more than 'max_bytes'. start and end are used to return the range,
+ *
+ * 1 is returned if we find something, 0 if nothing was in the tree
+ */
+static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
+ u64 *start, u64 *end, u64 max_bytes)
{
struct rb_node *node;
struct extent_state *state;
u64 total_bytes = 0;
spin_lock_irq(&tree->lock);
+
/*
* this search will find all the extents that end after
* our range starts.
*/
-search_again:
node = tree_search(tree, cur_start);
if (!node) {
if (!found)
while(1) {
state = rb_entry(node, struct extent_state, rb_node);
- if (found && state->start != cur_start) {
+ if (found && (state->start != cur_start ||
+ (state->state & EXTENT_BOUNDARY))) {
goto out;
}
if (!(state->state & EXTENT_DELALLOC)) {
*end = state->end;
goto out;
}
- if (!found) {
- struct extent_state *prev_state;
- struct rb_node *prev_node = node;
- while(1) {
- prev_node = rb_prev(prev_node);
- if (!prev_node)
- break;
- prev_state = rb_entry(prev_node,
- struct extent_state,
- rb_node);
- if (!(prev_state->state & EXTENT_DELALLOC))
- break;
- state = prev_state;
- node = prev_node;
- }
- }
- if (state->state & EXTENT_LOCKED) {
- DEFINE_WAIT(wait);
- atomic_inc(&state->refs);
- prepare_to_wait(&state->wq, &wait,
- TASK_UNINTERRUPTIBLE);
- spin_unlock_irq(&tree->lock);
- schedule();
- spin_lock_irq(&tree->lock);
- finish_wait(&state->wq, &wait);
- free_extent_state(state);
- goto search_again;
- }
- set_state_cb(tree, state, EXTENT_LOCKED);
- state->state |= EXTENT_LOCKED;
if (!found)
*start = state->start;
found++;
return found;
}
+static noinline int __unlock_for_delalloc(struct inode *inode,
+ struct page *locked_page,
+ u64 start, u64 end)
+{
+ int ret;
+ struct page *pages[16];
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ unsigned long nr_pages = end_index - index + 1;
+ int i;
+
+ if (index == locked_page->index && end_index == index)
+ return 0;
+
+ while(nr_pages > 0) {
+ ret = find_get_pages_contig(inode->i_mapping, index,
+ min(nr_pages, ARRAY_SIZE(pages)), pages);
+ for (i = 0; i < ret; i++) {
+ if (pages[i] != locked_page)
+ unlock_page(pages[i]);
+ page_cache_release(pages[i]);
+ }
+ nr_pages -= ret;
+ index += ret;
+ cond_resched();
+ }
+ return 0;
+}
+
+static noinline int lock_delalloc_pages(struct inode *inode,
+ struct page *locked_page,
+ u64 delalloc_start,
+ u64 delalloc_end)
+{
+ unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
+ unsigned long start_index = index;
+ unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
+ unsigned long pages_locked = 0;
+ struct page *pages[16];
+ unsigned long nrpages;
+ int ret;
+ int i;
+
+ /* the caller is responsible for locking the start index */
+ if (index == locked_page->index && index == end_index)
+ return 0;
+
+ /* skip the page at the start index */
+ nrpages = end_index - index + 1;
+ while(nrpages > 0) {
+ ret = find_get_pages_contig(inode->i_mapping, index,
+ min(nrpages, ARRAY_SIZE(pages)), pages);
+ if (ret == 0) {
+ ret = -EAGAIN;
+ goto done;
+ }
+ /* now we have an array of pages, lock them all */
+ for (i = 0; i < ret; i++) {
+ /*
+ * the caller is taking responsibility for
+ * locked_page
+ */
+ if (pages[i] != locked_page) {
+ lock_page(pages[i]);
+ if (pages[i]->mapping != inode->i_mapping) {
+ ret = -EAGAIN;
+ unlock_page(pages[i]);
+ page_cache_release(pages[i]);
+ goto done;
+ }
+ }
+ page_cache_release(pages[i]);
+ pages_locked++;
+ }
+ nrpages -= ret;
+ index += ret;
+ cond_resched();
+ }
+ ret = 0;
+done:
+ if (ret && pages_locked) {
+ __unlock_for_delalloc(inode, locked_page,
+ delalloc_start,
+ ((u64)(start_index + pages_locked - 1)) <<
+ PAGE_CACHE_SHIFT);
+ }
+ return ret;
+}
+
+/*
+ * find a contiguous range of bytes in the file marked as delalloc, not
+ * more than 'max_bytes'. start and end are used to return the range,
+ *
+ * 1 is returned if we find something, 0 if nothing was in the tree
+ */
+static noinline u64 find_lock_delalloc_range(struct inode *inode,
+ struct extent_io_tree *tree,
+ struct page *locked_page,
+ u64 *start, u64 *end,
+ u64 max_bytes)
+{
+ u64 delalloc_start;
+ u64 delalloc_end;
+ u64 found;
+ int ret;
+ int loops = 0;
+
+again:
+ /* step one, find a bunch of delalloc bytes starting at start */
+ delalloc_start = *start;
+ delalloc_end = 0;
+ found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
+ max_bytes);
+ if (!found || delalloc_end <= *start) {
+ *start = delalloc_start;
+ *end = delalloc_end;
+ return found;
+ }
+
+ /*
+ * start comes from the offset of locked_page. We have to lock
+ * pages in order, so we can't process delalloc bytes before
+ * locked_page
+ */
+ if (delalloc_start < *start) {
+ delalloc_start = *start;
+ }
+
+ /*
+ * make sure to limit the number of pages we try to lock down
+ * if we're looping.
+ */
+ if (delalloc_end + 1 - delalloc_start > max_bytes && loops) {
+ delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
+ }
+ /* step two, lock all the pages after the page that has start */
+ ret = lock_delalloc_pages(inode, locked_page,
+ delalloc_start, delalloc_end);
+ if (ret == -EAGAIN) {
+ /* some of the pages are gone, lets avoid looping by
+ * shortening the size of the delalloc range we're searching
+ */
+ if (!loops) {
+ unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
+ max_bytes = PAGE_CACHE_SIZE - offset;
+ loops = 1;
+ goto again;
+ } else {
+ found = 0;
+ goto out_failed;
+ }
+ }
+ BUG_ON(ret);
+
+ /* step three, lock the state bits for the whole range */
+ lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
+
+ /* then test to make sure it is all still delalloc */
+ ret = test_range_bit(tree, delalloc_start, delalloc_end,
+ EXTENT_DELALLOC, 1);
+ if (!ret) {
+ unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
+ __unlock_for_delalloc(inode, locked_page,
+ delalloc_start, delalloc_end);
+ cond_resched();
+ goto again;
+ }
+ *start = delalloc_start;
+ *end = delalloc_end;
+out_failed:
+ return found;
+}
+
+int extent_clear_unlock_delalloc(struct inode *inode,
+ struct extent_io_tree *tree,
+ u64 start, u64 end, struct page *locked_page,
+ int unlock_pages,
+ int clear_unlock,
+ int clear_delalloc, int clear_dirty,
+ int set_writeback,
+ int end_writeback)
+{
+ int ret;
+ struct page *pages[16];
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ unsigned long nr_pages = end_index - index + 1;
+ int i;
+ int clear_bits = 0;
+
+ if (clear_unlock)
+ clear_bits |= EXTENT_LOCKED;
+ if (clear_dirty)
+ clear_bits |= EXTENT_DIRTY;
+
+ if (clear_delalloc)
+ clear_bits |= EXTENT_DELALLOC;
+
+ clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS);
+ if (!(unlock_pages || clear_dirty || set_writeback || end_writeback))
+ return 0;
+
+ while(nr_pages > 0) {
+ ret = find_get_pages_contig(inode->i_mapping, index,
+ min(nr_pages, ARRAY_SIZE(pages)), pages);
+ for (i = 0; i < ret; i++) {
+ if (pages[i] == locked_page) {
+ page_cache_release(pages[i]);
+ continue;
+ }
+ if (clear_dirty)
+ clear_page_dirty_for_io(pages[i]);
+ if (set_writeback)
+ set_page_writeback(pages[i]);
+ if (end_writeback)
+ end_page_writeback(pages[i]);
+ if (unlock_pages)
+ unlock_page(pages[i]);
+ page_cache_release(pages[i]);
+ }
+ nr_pages -= ret;
+ index += ret;
+ cond_resched();
+ }
+ return 0;
+}
+EXPORT_SYMBOL(extent_clear_unlock_delalloc);
+
+/*
+ * count the number of bytes in the tree that have a given bit(s)
+ * set. This can be fairly slow, except for EXTENT_DIRTY which is
+ * cached. The total number found is returned.
+ */
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end, u64 max_bytes,
unsigned long bits)
}
EXPORT_SYMBOL(unlock_range);
+/*
+ * set the private field for a given byte offset in the tree. If there isn't
+ * an extent_state there already, this does nothing.
+ */
int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
{
struct rb_node *node;
* Scheduling is not allowed, so the extent state tree is expected
* to have one and only one object corresponding to this IO.
*/
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
static void end_bio_extent_writepage(struct bio *bio, int err)
-#else
-static int end_bio_extent_writepage(struct bio *bio,
- unsigned int bytes_done, int err)
-#endif
{
int uptodate = err == 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
int whole_page;
int ret;
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- if (bio->bi_size)
- return 1;
-#endif
do {
struct page *page = bvec->bv_page;
tree = &BTRFS_I(page->mapping->host)->io_tree;
else
check_page_writeback(tree, page);
} while (bvec >= bio->bi_io_vec);
+
bio_put(bio);
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- return 0;
-#endif
}
/*
* Scheduling is not allowed, so the extent state tree is expected
* to have one and only one object corresponding to this IO.
*/
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
static void end_bio_extent_readpage(struct bio *bio, int err)
-#else
-static int end_bio_extent_readpage(struct bio *bio,
- unsigned int bytes_done, int err)
-#endif
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
int whole_page;
int ret;
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- if (bio->bi_size)
- return 1;
-#endif
-
do {
struct page *page = bvec->bv_page;
tree = &BTRFS_I(page->mapping->host)->io_tree;
}
}
- if (uptodate)
+ if (uptodate) {
set_extent_uptodate(tree, start, end,
GFP_ATOMIC);
+ }
unlock_extent(tree, start, end, GFP_ATOMIC);
if (whole_page) {
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- return 0;
-#endif
}
/*
* the structs in the extent tree when done, and set the uptodate bits
* as appropriate.
*/
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
static void end_bio_extent_preparewrite(struct bio *bio, int err)
-#else
-static int end_bio_extent_preparewrite(struct bio *bio,
- unsigned int bytes_done, int err)
-#endif
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
u64 start;
u64 end;
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- if (bio->bi_size)
- return 1;
-#endif
-
do {
struct page *page = bvec->bv_page;
tree = &BTRFS_I(page->mapping->host)->io_tree;
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- return 0;
-#endif
}
static struct bio *
return bio;
}
-static int submit_one_bio(int rw, struct bio *bio, int mirror_num)
+static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
+ unsigned long bio_flags)
{
int ret = 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct page *page = bvec->bv_page;
struct extent_io_tree *tree = bio->bi_private;
- struct rb_node *node;
- struct extent_state *state;
u64 start;
u64 end;
start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
end = start + bvec->bv_len - 1;
- spin_lock_irq(&tree->lock);
- node = __etree_search(tree, start, NULL, NULL);
- BUG_ON(!node);
- state = rb_entry(node, struct extent_state, rb_node);
- while(state->end < end) {
- node = rb_next(node);
- state = rb_entry(node, struct extent_state, rb_node);
- }
- BUG_ON(state->end != end);
- spin_unlock_irq(&tree->lock);
-
bio->bi_private = NULL;
bio_get(bio);
if (tree->ops && tree->ops->submit_bio_hook)
tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
- mirror_num);
+ mirror_num, bio_flags);
else
submit_bio(rw, bio);
if (bio_flagged(bio, BIO_EOPNOTSUPP))
struct bio **bio_ret,
unsigned long max_pages,
bio_end_io_t end_io_func,
- int mirror_num)
+ int mirror_num,
+ unsigned long prev_bio_flags,
+ unsigned long bio_flags)
{
int ret = 0;
struct bio *bio;
int nr;
+ int contig = 0;
+ int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
+ int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
+ size_t page_size = min(size, PAGE_CACHE_SIZE);
if (bio_ret && *bio_ret) {
bio = *bio_ret;
- if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
+ if (old_compressed)
+ contig = bio->bi_sector == sector;
+ else
+ contig = bio->bi_sector + (bio->bi_size >> 9) ==
+ sector;
+
+ if (prev_bio_flags != bio_flags || !contig ||
(tree->ops && tree->ops->merge_bio_hook &&
- tree->ops->merge_bio_hook(page, offset, size, bio)) ||
- bio_add_page(bio, page, size, offset) < size) {
- ret = submit_one_bio(rw, bio, mirror_num);
+ tree->ops->merge_bio_hook(page, offset, page_size, bio,
+ bio_flags)) ||
+ bio_add_page(bio, page, page_size, offset) < page_size) {
+ ret = submit_one_bio(rw, bio, mirror_num,
+ prev_bio_flags);
bio = NULL;
} else {
return 0;
}
}
- nr = bio_get_nr_vecs(bdev);
+ if (this_compressed)
+ nr = BIO_MAX_PAGES;
+ else
+ nr = bio_get_nr_vecs(bdev);
+
bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
if (!bio) {
printk("failed to allocate bio nr %d\n", nr);
}
-
- bio_add_page(bio, page, size, offset);
+ bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
if (bio_ret) {
*bio_ret = bio;
} else {
- ret = submit_one_bio(rw, bio, mirror_num);
+ ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
}
return ret;
set_page_private(page, EXTENT_PAGE_PRIVATE);
}
}
+EXPORT_SYMBOL(set_page_extent_mapped);
void set_page_extent_head(struct page *page, unsigned long len)
{
static int __extent_read_full_page(struct extent_io_tree *tree,
struct page *page,
get_extent_t *get_extent,
- struct bio **bio, int mirror_num)
+ struct bio **bio, int mirror_num,
+ unsigned long *bio_flags)
{
struct inode *inode = page->mapping->host;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
int nr = 0;
size_t page_offset = 0;
size_t iosize;
+ size_t disk_io_size;
size_t blocksize = inode->i_sb->s_blocksize;
+ unsigned long this_bio_flag = 0;
set_page_extent_mapped(page);
end = page_end;
lock_extent(tree, start, end, GFP_NOFS);
+ if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
+ char *userpage;
+ size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
+
+ if (zero_offset) {
+ iosize = PAGE_CACHE_SIZE - zero_offset;
+ userpage = kmap_atomic(page, KM_USER0);
+ memset(userpage + zero_offset, 0, iosize);
+ flush_dcache_page(page);
+ kunmap_atomic(userpage, KM_USER0);
+ }
+ }
while (cur <= end) {
if (cur >= last_byte) {
char *userpage;
}
BUG_ON(end < cur);
+ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
+ this_bio_flag = EXTENT_BIO_COMPRESSED;
+
iosize = min(extent_map_end(em) - cur, end - cur + 1);
cur_end = min(extent_map_end(em) - 1, end);
iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
- sector = (em->block_start + extent_offset) >> 9;
+ if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
+ disk_io_size = em->block_len;
+ sector = em->block_start >> 9;
+ } else {
+ sector = (em->block_start + extent_offset) >> 9;
+ disk_io_size = iosize;
+ }
bdev = em->bdev;
block_start = em->block_start;
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ block_start = EXTENT_MAP_HOLE;
free_extent_map(em);
em = NULL;
}
/* the get_extent function already copied into the page */
if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
+ check_page_uptodate(tree, page);
unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
cur = cur + iosize;
page_offset += iosize;
unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
pnr -= page->index;
ret = submit_extent_page(READ, tree, page,
- sector, iosize, page_offset,
+ sector, disk_io_size, page_offset,
bdev, bio, pnr,
- end_bio_extent_readpage, mirror_num);
+ end_bio_extent_readpage, mirror_num,
+ *bio_flags,
+ this_bio_flag);
nr++;
+ *bio_flags = this_bio_flag;
}
if (ret)
SetPageError(page);
get_extent_t *get_extent)
{
struct bio *bio = NULL;
+ unsigned long bio_flags = 0;
int ret;
- ret = __extent_read_full_page(tree, page, get_extent, &bio, 0);
+ ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
+ &bio_flags);
if (bio)
- submit_one_bio(READ, bio, 0);
+ submit_one_bio(READ, bio, 0, bio_flags);
return ret;
}
EXPORT_SYMBOL(extent_read_full_page);
unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
u64 nr_delalloc;
u64 delalloc_end;
+ int page_started;
+ int compressed;
+ unsigned long nr_written = 0;
WARN_ON(!PageLocked(page));
pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
if (page->index > end_index ||
(page->index == end_index && !pg_offset)) {
- page->mapping->a_ops->invalidatepage(page, 0);
+ if (epd->extent_locked) {
+ if (tree->ops && tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, start,
+ page_end, NULL, 1);
+ }
unlock_page(page);
return 0;
}
delalloc_start = start;
delalloc_end = 0;
- while(delalloc_end < page_end) {
- nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start,
+ page_started = 0;
+ if (!epd->extent_locked) {
+ while(delalloc_end < page_end) {
+ nr_delalloc = find_lock_delalloc_range(inode, tree,
+ page,
+ &delalloc_start,
&delalloc_end,
128 * 1024 * 1024);
- if (nr_delalloc == 0) {
+ if (nr_delalloc == 0) {
+ delalloc_start = delalloc_end + 1;
+ continue;
+ }
+ tree->ops->fill_delalloc(inode, page, delalloc_start,
+ delalloc_end, &page_started,
+ &nr_written);
delalloc_start = delalloc_end + 1;
- continue;
}
- tree->ops->fill_delalloc(inode, delalloc_start,
- delalloc_end);
- clear_extent_bit(tree, delalloc_start,
- delalloc_end,
- EXTENT_LOCKED | EXTENT_DELALLOC,
- 1, 0, GFP_NOFS);
- delalloc_start = delalloc_end + 1;
+
+ /* did the fill delalloc function already unlock and start
+ * the IO?
+ */
+ if (page_started) {
+ ret = 0;
+ goto update_nr_written;
+ }
}
lock_extent(tree, start, page_end, GFP_NOFS);
+
unlock_start = start;
if (tree->ops && tree->ops->writepage_start_hook) {
- ret = tree->ops->writepage_start_hook(page, start, page_end);
+ ret = tree->ops->writepage_start_hook(page, start,
+ page_end);
if (ret == -EAGAIN) {
unlock_extent(tree, start, page_end, GFP_NOFS);
redirty_page_for_writepage(wbc, page);
unlock_page(page);
- return 0;
+ ret = 0;
+ goto update_nr_written;
}
}
+ nr_written++;
+
end = page_end;
if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
printk("found delalloc bits after lock_extent\n");
sector = (em->block_start + extent_offset) >> 9;
bdev = em->bdev;
block_start = em->block_start;
+ compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
free_extent_map(em);
em = NULL;
- if (block_start == EXTENT_MAP_HOLE ||
+ /*
+ * compressed and inline extents are written through other
+ * paths in the FS
+ */
+ if (compressed || block_start == EXTENT_MAP_HOLE ||
block_start == EXTENT_MAP_INLINE) {
clear_extent_dirty(tree, cur,
cur + iosize - 1, GFP_NOFS);
unlock_extent(tree, unlock_start, cur + iosize -1,
GFP_NOFS);
- if (tree->ops && tree->ops->writepage_end_io_hook)
+ /*
+ * end_io notification does not happen here for
+ * compressed extents
+ */
+ if (!compressed && tree->ops &&
+ tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, cur,
cur + iosize - 1,
NULL, 1);
- cur = cur + iosize;
+ else if (compressed) {
+ /* we don't want to end_page_writeback on
+ * a compressed extent. this happens
+ * elsewhere
+ */
+ nr++;
+ }
+
+ cur += iosize;
pg_offset += iosize;
unlock_start = cur;
continue;
}
-
/* leave this out until we have a page_mkwrite call */
if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
EXTENT_DIRTY, 0)) {
pg_offset += iosize;
continue;
}
+
clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
if (tree->ops && tree->ops->writepage_io_hook) {
ret = tree->ops->writepage_io_hook(page, cur,
ret = submit_extent_page(WRITE, tree, page, sector,
iosize, pg_offset, bdev,
&epd->bio, max_nr,
- end_bio_extent_writepage, 0);
+ end_bio_extent_writepage,
+ 0, 0, 0);
if (ret)
SetPageError(page);
}
if (unlock_start <= page_end)
unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
unlock_page(page);
+
+update_nr_written:
+ wbc->nr_to_write -= nr_written;
+ if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
+ wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
+ page->mapping->writeback_index = page->index + nr_written;
return 0;
}
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
-/* Taken directly from 2.6.23 for 2.6.18 back port */
-typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
- void *data);
-
/**
- * write_cache_pages - walk the list of dirty pages of the given address space
- * and write all of them.
+ * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
* @mapping: address space structure to write
* @wbc: subtract the number of written pages from *@wbc->nr_to_write
* @writepage: function called for each page
* WB_SYNC_ALL then we were called for data integrity and we must wait for
* existing IO to complete.
*/
-static int write_cache_pages(struct address_space *mapping,
- struct writeback_control *wbc, writepage_t writepage,
- void *data)
+int extent_write_cache_pages(struct extent_io_tree *tree,
+ struct address_space *mapping,
+ struct writeback_control *wbc,
+ writepage_t writepage, void *data)
{
struct backing_dev_info *bdi = mapping->backing_dev_info;
int ret = 0;
* swizzled back from swapper_space to tmpfs file
* mapping
*/
- lock_page(page);
+ if (tree->ops && tree->ops->write_cache_pages_lock_hook)
+ tree->ops->write_cache_pages_lock_hook(page);
+ else
+ lock_page(page);
if (unlikely(page->mapping != mapping)) {
unlock_page(page);
unlock_page(page);
ret = 0;
}
- if (ret || (--(wbc->nr_to_write) <= 0))
+ if (ret || wbc->nr_to_write <= 0)
done = 1;
if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
}
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = index;
+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+ range_whole = 1;
+
+ if (wbc->range_cont)
+ wbc->range_start = index << PAGE_CACHE_SHIFT;
return ret;
}
-#endif
+EXPORT_SYMBOL(extent_write_cache_pages);
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent,
.bio = NULL,
.tree = tree,
.get_extent = get_extent,
+ .extent_locked = 0,
};
struct writeback_control wbc_writepages = {
.bdi = wbc->bdi,
ret = __extent_writepage(page, wbc, &epd);
- write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
+ extent_write_cache_pages(tree, mapping, &wbc_writepages,
+ __extent_writepage, &epd);
if (epd.bio) {
- submit_one_bio(WRITE, epd.bio, 0);
+ submit_one_bio(WRITE, epd.bio, 0, 0);
}
return ret;
}
EXPORT_SYMBOL(extent_write_full_page);
+int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
+ u64 start, u64 end, get_extent_t *get_extent,
+ int mode)
+{
+ int ret = 0;
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+ unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
+ PAGE_CACHE_SHIFT;
+
+ struct extent_page_data epd = {
+ .bio = NULL,
+ .tree = tree,
+ .get_extent = get_extent,
+ .extent_locked = 1,
+ };
+ struct writeback_control wbc_writepages = {
+ .bdi = inode->i_mapping->backing_dev_info,
+ .sync_mode = mode,
+ .older_than_this = NULL,
+ .nr_to_write = nr_pages * 2,
+ .range_start = start,
+ .range_end = end + 1,
+ };
+
+ while(start <= end) {
+ page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+ if (clear_page_dirty_for_io(page))
+ ret = __extent_writepage(page, &wbc_writepages, &epd);
+ else {
+ if (tree->ops && tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, start,
+ start + PAGE_CACHE_SIZE - 1,
+ NULL, 1);
+ unlock_page(page);
+ }
+ page_cache_release(page);
+ start += PAGE_CACHE_SIZE;
+ }
+
+ if (epd.bio)
+ submit_one_bio(WRITE, epd.bio, 0, 0);
+ return ret;
+}
+EXPORT_SYMBOL(extent_write_locked_range);
+
int extent_writepages(struct extent_io_tree *tree,
struct address_space *mapping,
.bio = NULL,
.tree = tree,
.get_extent = get_extent,
+ .extent_locked = 0,
};
- ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
+ ret = extent_write_cache_pages(tree, mapping, wbc,
+ __extent_writepage, &epd);
if (epd.bio) {
- submit_one_bio(WRITE, epd.bio, 0);
+ submit_one_bio(WRITE, epd.bio, 0, 0);
}
return ret;
}
struct bio *bio = NULL;
unsigned page_idx;
struct pagevec pvec;
+ unsigned long bio_flags = 0;
pagevec_init(&pvec, 0);
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
if (!pagevec_add(&pvec, page))
__pagevec_lru_add(&pvec);
__extent_read_full_page(tree, page, get_extent,
- &bio, 0);
+ &bio, 0, &bio_flags);
}
page_cache_release(page);
}
__pagevec_lru_add(&pvec);
BUG_ON(!list_empty(pages));
if (bio)
- submit_one_bio(READ, bio, 0);
+ submit_one_bio(READ, bio, 0, bio_flags);
return 0;
}
EXPORT_SYMBOL(extent_readpages);
ret = submit_extent_page(READ, tree, page,
sector, iosize, page_offset, em->bdev,
NULL, 1,
- end_bio_extent_preparewrite, 0);
+ end_bio_extent_preparewrite, 0,
+ 0, 0);
iocount++;
block_start = block_start + iosize;
} else {
}
if (!test_range_bit(tree, em->start,
extent_map_end(em) - 1,
- EXTENT_LOCKED, 0)) {
+ EXTENT_LOCKED | EXTENT_WRITEBACK |
+ EXTENT_ORDERED,
+ 0)) {
remove_extent_mapping(map, em);
/* once for the rb tree */
free_extent_map(em);
struct inode *inode = mapping->host;
u64 start = iblock << inode->i_blkbits;
sector_t sector = 0;
+ size_t blksize = (1 << inode->i_blkbits);
struct extent_map *em;
- em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0);
+ lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
+ GFP_NOFS);
+ em = get_extent(inode, NULL, 0, start, blksize, 0);
+ unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
+ GFP_NOFS);
if (!em || IS_ERR(em))
return 0;
- if (em->block_start == EXTENT_MAP_INLINE ||
- em->block_start == EXTENT_MAP_HOLE)
+ if (em->block_start > EXTENT_MAP_LAST_BYTE)
goto out;
sector = (em->block_start + start - em->start) >> inode->i_blkbits;
* by increasing the reference count. So we know the page must
* be in the radix tree.
*/
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
rcu_read_lock();
-#else
- read_lock_irq(&mapping->tree_lock);
-#endif
p = radix_tree_lookup(&mapping->page_tree, i);
-
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
rcu_read_unlock();
-#else
- read_unlock_irq(&mapping->tree_lock);
-#endif
+
return p;
}
gfp_t mask)
{
struct extent_buffer *eb = NULL;
+#ifdef LEAK_DEBUG
unsigned long flags;
+#endif
eb = kmem_cache_zalloc(extent_buffer_cache, mask);
eb->start = start;
eb->len = len;
mutex_init(&eb->mutex);
+#ifdef LEAK_DEBUG
spin_lock_irqsave(&leak_lock, flags);
list_add(&eb->leak_list, &buffers);
spin_unlock_irqrestore(&leak_lock, flags);
+#endif
atomic_set(&eb->refs, 1);
return eb;
static void __free_extent_buffer(struct extent_buffer *eb)
{
+#ifdef LEAK_DEBUG
unsigned long flags;
spin_lock_irqsave(&leak_lock, flags);
list_del(&eb->leak_list);
spin_unlock_irqrestore(&leak_lock, flags);
+#endif
kmem_cache_free(extent_buffer_cache, eb);
}
if (eb) {
atomic_inc(&eb->refs);
spin_unlock(&tree->buffer_lock);
+ mark_page_accessed(eb->first_page);
return eb;
}
spin_unlock(&tree->buffer_lock);
atomic_inc(&eb->refs);
spin_unlock(&tree->buffer_lock);
+ if (eb)
+ mark_page_accessed(eb->first_page);
+
return eb;
}
EXPORT_SYMBOL(find_extent_buffer);
}
}
clear_page_dirty_for_io(page);
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
spin_lock_irq(&page->mapping->tree_lock);
-#else
- read_lock_irq(&page->mapping->tree_lock);
-#endif
if (!PageDirty(page)) {
radix_tree_tag_clear(&page->mapping->page_tree,
page_index(page),
PAGECACHE_TAG_DIRTY);
}
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
spin_unlock_irq(&page->mapping->tree_lock);
-#else
- read_unlock_irq(&page->mapping->tree_lock);
-#endif
unlock_page(page);
}
return 0;
* properly set. releasepage may drop page->private
* on us if the page isn't already dirty.
*/
+ lock_page(page);
if (i == 0) {
- lock_page(page);
set_page_extent_head(page, eb->len);
} else if (PagePrivate(page) &&
page->private != EXTENT_PAGE_PRIVATE) {
- lock_page(page);
set_page_extent_mapped(page);
- unlock_page(page);
}
__set_page_dirty_nobuffers(extent_buffer_page(eb, i));
- if (i == 0)
- unlock_page(page);
+ set_extent_dirty(tree, page_offset(page),
+ page_offset(page) + PAGE_CACHE_SIZE -1,
+ GFP_NOFS);
+ unlock_page(page);
}
- return set_extent_dirty(tree, eb->start,
- eb->start + eb->len - 1, GFP_NOFS);
+ return 0;
}
EXPORT_SYMBOL(set_extent_buffer_dirty);
int inc_all_pages = 0;
unsigned long num_pages;
struct bio *bio = NULL;
+ unsigned long bio_flags = 0;
if (eb->flags & EXTENT_UPTODATE)
return 0;
if (all_uptodate) {
if (start_i == 0)
eb->flags |= EXTENT_UPTODATE;
+ if (ret) {
+ printk("all up to date but ret is %d\n", ret);
+ }
goto unlock_exit;
}
ClearPageError(page);
err = __extent_read_full_page(tree, page,
get_extent, &bio,
- mirror_num);
+ mirror_num, &bio_flags);
if (err) {
ret = err;
+ printk("err %d from __extent_read_full_page\n", ret);
}
} else {
unlock_page(page);
}
if (bio)
- submit_one_bio(READ, bio, mirror_num);
+ submit_one_bio(READ, bio, mirror_num, bio_flags);
if (ret || !wait) {
+ if (ret)
+ printk("ret %d wait %d returning\n", ret, wait);
return ret;
}
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
wait_on_page_locked(page);
if (!PageUptodate(page)) {
+ printk("page not uptodate after wait_on_page_locked\n");
ret = -EIO;
}
}
}
/* at this point we can safely release the extent buffer */
num_pages = num_extent_pages(eb->start, eb->len);
- for (i = 0; i < num_pages; i++) {
- struct page *page = extent_buffer_page(eb, i);
- page_cache_release(page);
- }
+ for (i = 0; i < num_pages; i++)
+ page_cache_release(extent_buffer_page(eb, i));
rb_erase(&eb->rb_node, &tree->buffer);
__free_extent_buffer(eb);
out: