struct extent_map_tree *tree;
get_extent_t *get_extent;
};
+
int __init extent_map_init(void)
{
extent_map_cache = btrfs_cache_create("extent_map",
return -ENOMEM;
}
-void __exit extent_map_exit(void)
+void extent_map_exit(void)
{
struct extent_state *state;
tree->map.rb_node = NULL;
tree->state.rb_node = NULL;
tree->ops = NULL;
+ tree->dirty_bytes = 0;
rwlock_init(&tree->lock);
spin_lock_init(&tree->lru_lock);
tree->mapping = mapping;
printk("end < start %Lu %Lu\n", end, start);
WARN_ON(1);
}
+ if (bits & EXTENT_DIRTY)
+ tree->dirty_bytes += end - start + 1;
state->state |= bits;
state->start = start;
state->end = end;
int delete)
{
int ret = state->state & bits;
+
+ if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
+ u64 range = state->end - state->start + 1;
+ WARN_ON(range > tree->dirty_bytes);
+ tree->dirty_bytes -= range;
+ }
state->state &= ~bits;
if (wake)
wake_up(&state->wq);
}
EXPORT_SYMBOL(wait_extent_bit);
+static void set_state_bits(struct extent_map_tree *tree,
+ struct extent_state *state,
+ int bits)
+{
+ if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
+ u64 range = state->end - state->start + 1;
+ tree->dirty_bytes += range;
+ }
+ state->state |= bits;
+}
+
/*
* set some bits on a range in the tree. This may require allocations
* or sleeping, so the gfp mask is used to indicate what is allowed.
err = -EEXIST;
goto out;
}
- state->state |= bits;
+ set_state_bits(tree, state, bits);
start = state->end + 1;
merge_state(tree, state);
goto search_again;
if (err)
goto out;
if (state->end <= end) {
- state->state |= bits;
+ set_state_bits(tree, state, bits);
start = state->end + 1;
merge_state(tree, state);
} else {
err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST);
- prealloc->state |= bits;
+ set_state_bits(tree, prealloc, bits);
merge_state(tree, prealloc);
prealloc = NULL;
goto out;
search_again:
node = tree_search(&tree->state, cur_start);
if (!node || IS_ERR(node)) {
+ *end = (u64)-1;
goto out;
}
goto out;
}
if (!(state->state & EXTENT_DELALLOC)) {
+ if (!found)
+ *end = state->end;
goto out;
}
if (!found) {
}
u64 count_range_bits(struct extent_map_tree *tree,
- u64 *start, u64 max_bytes, unsigned long bits)
+ u64 *start, u64 search_end, u64 max_bytes,
+ unsigned long bits)
{
struct rb_node *node;
struct extent_state *state;
u64 total_bytes = 0;
int found = 0;
+ if (search_end <= cur_start) {
+ printk("search_end %Lu start %Lu\n", search_end, cur_start);
+ WARN_ON(1);
+ return 0;
+ }
+
write_lock_irq(&tree->lock);
+ if (cur_start == 0 && bits == EXTENT_DIRTY) {
+ total_bytes = tree->dirty_bytes;
+ goto out;
+ }
/*
* this search will find all the extents that end after
* our range starts.
while(1) {
state = rb_entry(node, struct extent_state, rb_node);
- if ((state->state & bits)) {
- total_bytes += state->end - state->start + 1;
+ if (state->start > search_end)
+ break;
+ if (state->end >= cur_start && (state->state & bits)) {
+ total_bytes += min(search_end, state->end) + 1 -
+ max(cur_start, state->start);
if (total_bytes >= max_bytes)
break;
if (!found) {
write_unlock_irq(&tree->lock);
return total_bytes;
}
-
/*
* helper function to lock both pages and extents in the tree.
* pages must be locked first.
static int submit_one_bio(int rw, struct bio *bio)
{
+ u64 maxsector;
int ret = 0;
+
bio_get(bio);
+
+ maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
+ if (maxsector < bio->bi_sector) {
+ printk("sector too large max %Lu got %llu\n", maxsector,
+ (unsigned long long)bio->bi_sector);
+ WARN_ON(1);
+ }
+
submit_bio(rw, bio);
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
while (cur <= end) {
if (cur >= last_byte) {
+ char *userpage;
iosize = PAGE_CACHE_SIZE - page_offset;
- zero_user_page(page, page_offset, iosize, KM_USER0);
+ userpage = kmap_atomic(page, KM_USER0);
+ memset(userpage + page_offset, 0, iosize);
+ flush_dcache_page(page);
+ kunmap_atomic(userpage, KM_USER0);
set_extent_uptodate(tree, cur, cur + iosize - 1,
GFP_NOFS);
unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
/* we've found a hole, just zero and go on */
if (block_start == EXTENT_MAP_HOLE) {
- zero_user_page(page, page_offset, iosize, KM_USER0);
+ char *userpage;
+ userpage = kmap_atomic(page, KM_USER0);
+ memset(userpage + page_offset, 0, iosize);
+ flush_dcache_page(page);
+ kunmap_atomic(userpage, KM_USER0);
+
set_extent_uptodate(tree, cur, cur + iosize - 1,
GFP_NOFS);
unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
}
if (page->index == end_index) {
+ char *userpage;
+
size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
- zero_user_page(page, offset,
- PAGE_CACHE_SIZE - offset, KM_USER0);
+
+ userpage = kmap_atomic(page, KM_USER0);
+ memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset);
+ flush_dcache_page(page);
+ kunmap_atomic(userpage, KM_USER0);
}
set_page_extent_mapped(page);
nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start,
&delalloc_end,
128 * 1024 * 1024);
- if (nr_delalloc <= 0)
- break;
+ if (nr_delalloc == 0) {
+ delalloc_start = delalloc_end + 1;
+ continue;
+ }
tree->ops->fill_delalloc(inode, delalloc_start,
delalloc_end);
clear_extent_bit(tree, delalloc_start,
return 0;
}
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+
+/* Taken directly from 2.6.23 for 2.6.18 back port */
+typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
+ void *data);
+
+/**
+ * write_cache_pages - walk the list of dirty pages of the given address space
+ * and write all of them.
+ * @mapping: address space structure to write
+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
+ * @writepage: function called for each page
+ * @data: data passed to writepage function
+ *
+ * If a page is already under I/O, write_cache_pages() skips it, even
+ * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
+ * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
+ * and msync() need to guarantee that all the data which was dirty at the time
+ * the call was made get new I/O started against them. If wbc->sync_mode is
+ * WB_SYNC_ALL then we were called for data integrity and we must wait for
+ * existing IO to complete.
+ */
+static int write_cache_pages(struct address_space *mapping,
+ struct writeback_control *wbc, writepage_t writepage,
+ void *data)
+{
+ struct backing_dev_info *bdi = mapping->backing_dev_info;
+ int ret = 0;
+ int done = 0;
+ struct pagevec pvec;
+ int nr_pages;
+ pgoff_t index;
+ pgoff_t end; /* Inclusive */
+ int scanned = 0;
+ int range_whole = 0;
+
+ if (wbc->nonblocking && bdi_write_congested(bdi)) {
+ wbc->encountered_congestion = 1;
+ return 0;
+ }
+
+ pagevec_init(&pvec, 0);
+ if (wbc->range_cyclic) {
+ index = mapping->writeback_index; /* Start from prev offset */
+ end = -1;
+ } else {
+ index = wbc->range_start >> PAGE_CACHE_SHIFT;
+ end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+ range_whole = 1;
+ scanned = 1;
+ }
+retry:
+ while (!done && (index <= end) &&
+ (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+ unsigned i;
+
+ scanned = 1;
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ /*
+ * At this point we hold neither mapping->tree_lock nor
+ * lock on the page itself: the page may be truncated or
+ * invalidated (changing page->mapping to NULL), or even
+ * swizzled back from swapper_space to tmpfs file
+ * mapping
+ */
+ lock_page(page);
+
+ if (unlikely(page->mapping != mapping)) {
+ unlock_page(page);
+ continue;
+ }
+
+ if (!wbc->range_cyclic && page->index > end) {
+ done = 1;
+ unlock_page(page);
+ continue;
+ }
+
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ wait_on_page_writeback(page);
+
+ if (PageWriteback(page) ||
+ !clear_page_dirty_for_io(page)) {
+ unlock_page(page);
+ continue;
+ }
+
+ ret = (*writepage)(page, wbc, data);
+
+ if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
+ unlock_page(page);
+ ret = 0;
+ }
+ if (ret || (--(wbc->nr_to_write) <= 0))
+ done = 1;
+ if (wbc->nonblocking && bdi_write_congested(bdi)) {
+ wbc->encountered_congestion = 1;
+ done = 1;
+ }
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+ if (!scanned && !done) {
+ /*
+ * We hit the last page and there is more work to be done: wrap
+ * back to the start of the file
+ */
+ scanned = 1;
+ index = 0;
+ goto retry;
+ }
+ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+ mapping->writeback_index = index;
+ return ret;
+}
+#endif
+
int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
get_extent_t *get_extent,
struct writeback_control *wbc)
ret = __extent_writepage(page, wbc, &epd);
write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
- if (epd.bio)
+ if (epd.bio) {
submit_one_bio(WRITE, epd.bio);
+ }
return ret;
}
EXPORT_SYMBOL(extent_write_full_page);
+
int extent_writepages(struct extent_map_tree *tree,
struct address_space *mapping,
get_extent_t *get_extent,
struct writeback_control *wbc)
{
- int ret;
+ int ret = 0;
struct extent_page_data epd = {
.bio = NULL,
.tree = tree,
};
ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
- if (epd.bio)
+ if (epd.bio) {
submit_one_bio(WRITE, epd.bio);
+ }
return ret;
}
EXPORT_SYMBOL(extent_writepages);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
}
- if (!isnew && !PageUptodate(page) &&
+ if ((em->block_start != EXTENT_MAP_HOLE &&
+ em->block_start != EXTENT_MAP_INLINE) &&
+ !isnew && !PageUptodate(page) &&
(block_off_end > to || block_off_start < from) &&
!test_range_bit(tree, block_start, cur_end,
EXTENT_UPTODATE, 1)) {
u64 extent_offset = block_start - em->start;
size_t iosize;
sector = (em->block_start + extent_offset) >> 9;
- iosize = (cur_end - block_start + blocksize - 1) &
+ iosize = (cur_end - block_start + blocksize) &
~((u64)blocksize - 1);
/*
* we've already got the extent locked, but we