#include <linux/spinlock.h>
#include <linux/blkdev.h>
#include <linux/swap.h>
-#include <linux/version.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include "extent_io.h"
#include "extent_map.h"
-
-/* temporary define until extent_map moves out of btrfs */
-struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
- unsigned long extra_flags,
- void (*ctor)(void *, struct kmem_cache *,
- unsigned long));
+#include "compat.h"
+#include "ctree.h"
+#include "btrfs_inode.h"
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
static LIST_HEAD(buffers);
static LIST_HEAD(states);
+#define LEAK_DEBUG 0
+#if LEAK_DEBUG
+static DEFINE_SPINLOCK(leak_lock);
+#endif
+
#define BUFFER_LRU_MAX 64
struct tree_entry {
struct bio *bio;
struct extent_io_tree *tree;
get_extent_t *get_extent;
+
+ /* tells writepage not to lock the state bits for this range
+ * it still does the unlocking
+ */
+ unsigned int extent_locked:1;
+
+ /* tells the submit_bio code to use a WRITE_SYNC */
+ unsigned int sync_io:1;
};
int __init extent_io_init(void)
{
- extent_state_cache = btrfs_cache_create("extent_state",
- sizeof(struct extent_state), 0,
- NULL);
+ extent_state_cache = kmem_cache_create("extent_state",
+ sizeof(struct extent_state), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_state_cache)
return -ENOMEM;
- extent_buffer_cache = btrfs_cache_create("extent_buffers",
- sizeof(struct extent_buffer), 0,
- NULL);
+ extent_buffer_cache = kmem_cache_create("extent_buffers",
+ sizeof(struct extent_buffer), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_buffer_cache)
goto free_state_cache;
return 0;
void extent_io_exit(void)
{
struct extent_state *state;
+ struct extent_buffer *eb;
while (!list_empty(&states)) {
- state = list_entry(states.next, struct extent_state, list);
- printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs));
- list_del(&state->list);
+ state = list_entry(states.next, struct extent_state, leak_list);
+ printk(KERN_ERR "btrfs state leak: start %llu end %llu "
+ "state %lu in tree %p refs %d\n",
+ (unsigned long long)state->start,
+ (unsigned long long)state->end,
+ state->state, state->tree, atomic_read(&state->refs));
+ list_del(&state->leak_list);
kmem_cache_free(extent_state_cache, state);
}
+ while (!list_empty(&buffers)) {
+ eb = list_entry(buffers.next, struct extent_buffer, leak_list);
+ printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
+ "refs %d\n", (unsigned long long)eb->start,
+ eb->len, atomic_read(&eb->refs));
+ list_del(&eb->leak_list);
+ kmem_cache_free(extent_buffer_cache, eb);
+ }
if (extent_state_cache)
kmem_cache_destroy(extent_state_cache);
if (extent_buffer_cache)
struct address_space *mapping, gfp_t mask)
{
tree->state.rb_node = NULL;
+ tree->buffer.rb_node = NULL;
tree->ops = NULL;
tree->dirty_bytes = 0;
spin_lock_init(&tree->lock);
- spin_lock_init(&tree->lru_lock);
+ spin_lock_init(&tree->buffer_lock);
tree->mapping = mapping;
- INIT_LIST_HEAD(&tree->buffer_lru);
- tree->lru_size = 0;
- tree->last = NULL;
}
-EXPORT_SYMBOL(extent_io_tree_init);
-void extent_io_tree_empty_lru(struct extent_io_tree *tree)
-{
- struct extent_buffer *eb;
- while(!list_empty(&tree->buffer_lru)) {
- eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
- lru);
- list_del_init(&eb->lru);
- free_extent_buffer(eb);
- }
-}
-EXPORT_SYMBOL(extent_io_tree_empty_lru);
-
-struct extent_state *alloc_extent_state(gfp_t mask)
+static struct extent_state *alloc_extent_state(gfp_t mask)
{
struct extent_state *state;
+#if LEAK_DEBUG
+ unsigned long flags;
+#endif
state = kmem_cache_alloc(extent_state_cache, mask);
- if (!state || IS_ERR(state))
+ if (!state)
return state;
state->state = 0;
state->private = 0;
state->tree = NULL;
-
+#if LEAK_DEBUG
+ spin_lock_irqsave(&leak_lock, flags);
+ list_add(&state->leak_list, &states);
+ spin_unlock_irqrestore(&leak_lock, flags);
+#endif
atomic_set(&state->refs, 1);
init_waitqueue_head(&state->wq);
return state;
}
-EXPORT_SYMBOL(alloc_extent_state);
-void free_extent_state(struct extent_state *state)
+static void free_extent_state(struct extent_state *state)
{
if (!state)
return;
if (atomic_dec_and_test(&state->refs)) {
+#if LEAK_DEBUG
+ unsigned long flags;
+#endif
WARN_ON(state->tree);
+#if LEAK_DEBUG
+ spin_lock_irqsave(&leak_lock, flags);
+ list_del(&state->leak_list);
+ spin_unlock_irqrestore(&leak_lock, flags);
+#endif
kmem_cache_free(extent_state_cache, state);
}
}
-EXPORT_SYMBOL(free_extent_state);
static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
struct rb_node *node)
{
- struct rb_node ** p = &root->rb_node;
- struct rb_node * parent = NULL;
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
struct tree_entry *entry;
- while(*p) {
+ while (*p) {
parent = *p;
entry = rb_entry(parent, struct tree_entry, rb_node);
struct rb_node **next_ret)
{
struct rb_root *root = &tree->state;
- struct rb_node * n = root->rb_node;
+ struct rb_node *n = root->rb_node;
struct rb_node *prev = NULL;
struct rb_node *orig_prev = NULL;
struct tree_entry *entry;
struct tree_entry *prev_entry = NULL;
- if (tree->last) {
- struct extent_state *state;
- state = tree->last;
- if (state->start <= offset && offset <= state->end)
- return &tree->last->rb_node;
- }
- while(n) {
+ while (n) {
entry = rb_entry(n, struct tree_entry, rb_node);
prev = n;
prev_entry = entry;
n = n->rb_left;
else if (offset > entry->end)
n = n->rb_right;
- else {
- tree->last = rb_entry(n, struct extent_state, rb_node);
+ else
return n;
- }
}
if (prev_ret) {
orig_prev = prev;
- while(prev && offset > prev_entry->end) {
+ while (prev && offset > prev_entry->end) {
prev = rb_next(prev);
prev_entry = rb_entry(prev, struct tree_entry, rb_node);
}
if (next_ret) {
prev_entry = rb_entry(prev, struct tree_entry, rb_node);
- while(prev && offset < prev_entry->start) {
+ while (prev && offset < prev_entry->start) {
prev = rb_prev(prev);
prev_entry = rb_entry(prev, struct tree_entry, rb_node);
}
struct rb_node *ret;
ret = __etree_search(tree, offset, &prev, NULL);
- if (!ret) {
- if (prev) {
- tree->last = rb_entry(prev, struct extent_state,
- rb_node);
- }
+ if (!ret)
return prev;
- }
return ret;
}
+static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
+ u64 offset, struct rb_node *node)
+{
+ struct rb_root *root = &tree->buffer;
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct extent_buffer *eb;
+
+ while (*p) {
+ parent = *p;
+ eb = rb_entry(parent, struct extent_buffer, rb_node);
+
+ if (offset < eb->start)
+ p = &(*p)->rb_left;
+ else if (offset > eb->start)
+ p = &(*p)->rb_right;
+ else
+ return eb;
+ }
+
+ rb_link_node(node, parent, p);
+ rb_insert_color(node, root);
+ return NULL;
+}
+
+static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
+ u64 offset)
+{
+ struct rb_root *root = &tree->buffer;
+ struct rb_node *n = root->rb_node;
+ struct extent_buffer *eb;
+
+ while (n) {
+ eb = rb_entry(n, struct extent_buffer, rb_node);
+ if (offset < eb->start)
+ n = n->rb_left;
+ else if (offset > eb->start)
+ n = n->rb_right;
+ else
+ return eb;
+ }
+ return NULL;
+}
+
+static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
+ struct extent_state *other)
+{
+ if (tree->ops && tree->ops->merge_extent_hook)
+ tree->ops->merge_extent_hook(tree->mapping->host, new,
+ other);
+}
+
/*
* utility function to look for merge candidates inside a given range.
* Any extents with matching state are merged together into a single
struct extent_state *other;
struct rb_node *other_node;
- if (state->state & EXTENT_IOBITS)
+ if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
return 0;
other_node = rb_prev(&state->rb_node);
other = rb_entry(other_node, struct extent_state, rb_node);
if (other->end == state->start - 1 &&
other->state == state->state) {
+ merge_cb(tree, state, other);
state->start = other->start;
other->tree = NULL;
- if (tree->last == other)
- tree->last = NULL;
rb_erase(&other->rb_node, &tree->state);
free_extent_state(other);
}
other = rb_entry(other_node, struct extent_state, rb_node);
if (other->start == state->end + 1 &&
other->state == state->state) {
+ merge_cb(tree, state, other);
other->start = state->start;
state->tree = NULL;
- if (tree->last == state)
- tree->last = NULL;
rb_erase(&state->rb_node, &tree->state);
free_extent_state(state);
+ state = NULL;
}
}
+
return 0;
}
-static void set_state_cb(struct extent_io_tree *tree,
+static int set_state_cb(struct extent_io_tree *tree,
struct extent_state *state,
unsigned long bits)
{
if (tree->ops && tree->ops->set_bit_hook) {
- tree->ops->set_bit_hook(tree->mapping->host, state->start,
- state->end, state->state, bits);
+ return tree->ops->set_bit_hook(tree->mapping->host,
+ state->start, state->end,
+ state->state, bits);
}
+
+ return 0;
}
static void clear_state_cb(struct extent_io_tree *tree,
struct extent_state *state,
unsigned long bits)
{
- if (tree->ops && tree->ops->set_bit_hook) {
- tree->ops->clear_bit_hook(tree->mapping->host, state->start,
- state->end, state->state, bits);
- }
+ if (tree->ops && tree->ops->clear_bit_hook)
+ tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
}
/*
int bits)
{
struct rb_node *node;
+ int ret;
if (end < start) {
- printk("end < start %Lu %Lu\n", end, start);
+ printk(KERN_ERR "btrfs end < start %llu %llu\n",
+ (unsigned long long)end,
+ (unsigned long long)start);
WARN_ON(1);
}
+ state->start = start;
+ state->end = end;
+ ret = set_state_cb(tree, state, bits);
+ if (ret)
+ return ret;
+
if (bits & EXTENT_DIRTY)
tree->dirty_bytes += end - start + 1;
- set_state_cb(tree, state, bits);
state->state |= bits;
- state->start = start;
- state->end = end;
node = tree_insert(&tree->state, end, &state->rb_node);
if (node) {
struct extent_state *found;
found = rb_entry(node, struct extent_state, rb_node);
- printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
+ printk(KERN_ERR "btrfs found node %llu %llu on insert of "
+ "%llu %llu\n", (unsigned long long)found->start,
+ (unsigned long long)found->end,
+ (unsigned long long)start, (unsigned long long)end);
free_extent_state(state);
return -EEXIST;
}
state->tree = tree;
- tree->last = state;
merge_state(tree, state);
return 0;
}
+static int split_cb(struct extent_io_tree *tree, struct extent_state *orig,
+ u64 split)
+{
+ if (tree->ops && tree->ops->split_extent_hook)
+ return tree->ops->split_extent_hook(tree->mapping->host,
+ orig, split);
+ return 0;
+}
+
/*
* split a given extent state struct in two, inserting the preallocated
* struct 'prealloc' as the newly created second half. 'split' indicates an
struct extent_state *prealloc, u64 split)
{
struct rb_node *node;
+
+ split_cb(tree, orig, split);
+
prealloc->start = orig->start;
prealloc->end = split - 1;
prealloc->state = orig->state;
node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
if (node) {
- struct extent_state *found;
- found = rb_entry(node, struct extent_state, rb_node);
- printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
free_extent_state(prealloc);
return -EEXIST;
}
struct extent_state *state, int bits, int wake,
int delete)
{
- int ret = state->state & bits;
+ int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING;
+ int ret = state->state & bits_to_clear;
if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
tree->dirty_bytes -= range;
}
clear_state_cb(tree, state, bits);
- state->state &= ~bits;
+ state->state &= ~bits_to_clear;
if (wake)
wake_up(&state->wq);
if (delete || state->state == 0) {
if (state->tree) {
clear_state_cb(tree, state, state->state);
- if (tree->last == state)
- tree->last = NULL;
rb_erase(&state->rb_node, &tree->state);
state->tree = NULL;
free_extent_state(state);
* bits were already set, or zero if none of the bits were already set.
*/
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int wake, int delete, gfp_t mask)
+ int bits, int wake, int delete,
+ struct extent_state **cached_state,
+ gfp_t mask)
{
struct extent_state *state;
+ struct extent_state *cached;
struct extent_state *prealloc = NULL;
+ struct rb_node *next_node;
struct rb_node *node;
- unsigned long flags;
+ u64 last_end;
int err;
int set = 0;
return -ENOMEM;
}
- spin_lock_irqsave(&tree->lock, flags);
+ spin_lock(&tree->lock);
+ if (cached_state) {
+ cached = *cached_state;
+ *cached_state = NULL;
+ cached_state = NULL;
+ if (cached && cached->tree && cached->start == start) {
+ atomic_dec(&cached->refs);
+ state = cached;
+ goto hit_next;
+ }
+ free_extent_state(cached);
+ }
/*
* this search will find the extents that end after
* our range starts
if (!node)
goto out;
state = rb_entry(node, struct extent_state, rb_node);
+hit_next:
if (state->start > end)
goto out;
WARN_ON(state->end < start);
+ last_end = state->end;
/*
* | ---- desired range ---- |
if (err)
goto out;
if (state->end <= end) {
- start = state->end + 1;
- set |= clear_state_bit(tree, state, bits,
- wake, delete);
- } else {
- start = state->start;
+ set |= clear_state_bit(tree, state, bits, wake,
+ delete);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
}
goto search_again;
}
prealloc = alloc_extent_state(GFP_ATOMIC);
err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST);
-
if (wake)
wake_up(&state->wq);
- set |= clear_state_bit(tree, prealloc, bits,
- wake, delete);
+
+ set |= clear_state_bit(tree, prealloc, bits, wake, delete);
+
prealloc = NULL;
goto out;
}
- start = state->end + 1;
+ if (state->end < end && prealloc && !need_resched())
+ next_node = rb_next(&state->rb_node);
+ else
+ next_node = NULL;
+
set |= clear_state_bit(tree, state, bits, wake, delete);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
+ if (start <= end && next_node) {
+ state = rb_entry(next_node, struct extent_state,
+ rb_node);
+ if (state->start == start)
+ goto hit_next;
+ }
goto search_again;
out:
- spin_unlock_irqrestore(&tree->lock, flags);
+ spin_unlock(&tree->lock);
if (prealloc)
free_extent_state(prealloc);
search_again:
if (start > end)
goto out;
- spin_unlock_irqrestore(&tree->lock, flags);
+ spin_unlock(&tree->lock);
if (mask & __GFP_WAIT)
cond_resched();
goto again;
}
-EXPORT_SYMBOL(clear_extent_bit);
static int wait_on_state(struct extent_io_tree *tree,
struct extent_state *state)
+ __releases(tree->lock)
+ __acquires(tree->lock)
{
DEFINE_WAIT(wait);
prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
- spin_unlock_irq(&tree->lock);
+ spin_unlock(&tree->lock);
schedule();
- spin_lock_irq(&tree->lock);
+ spin_lock(&tree->lock);
finish_wait(&state->wq, &wait);
return 0;
}
struct extent_state *state;
struct rb_node *node;
- spin_lock_irq(&tree->lock);
+ spin_lock(&tree->lock);
again:
while (1) {
/*
break;
if (need_resched()) {
- spin_unlock_irq(&tree->lock);
+ spin_unlock(&tree->lock);
cond_resched();
- spin_lock_irq(&tree->lock);
+ spin_lock(&tree->lock);
}
}
out:
- spin_unlock_irq(&tree->lock);
+ spin_unlock(&tree->lock);
return 0;
}
-EXPORT_SYMBOL(wait_extent_bit);
-static void set_state_bits(struct extent_io_tree *tree,
+static int set_state_bits(struct extent_io_tree *tree,
struct extent_state *state,
int bits)
{
+ int ret;
+
+ ret = set_state_cb(tree, state, bits);
+ if (ret)
+ return ret;
+
if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
tree->dirty_bytes += range;
}
- set_state_cb(tree, state, bits);
state->state |= bits;
+
+ return 0;
+}
+
+static void cache_state(struct extent_state *state,
+ struct extent_state **cached_ptr)
+{
+ if (cached_ptr && !(*cached_ptr)) {
+ if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) {
+ *cached_ptr = state;
+ atomic_inc(&state->refs);
+ }
+ }
}
/*
- * set some bits on a range in the tree. This may require allocations
- * or sleeping, so the gfp mask is used to indicate what is allowed.
+ * set some bits on a range in the tree. This may require allocations or
+ * sleeping, so the gfp mask is used to indicate what is allowed.
*
- * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
- * range already has the desired bits set. The start of the existing
- * range is returned in failed_start in this case.
+ * If any of the exclusive bits are set, this will fail with -EEXIST if some
+ * part of the range already has the desired bits set. The start of the
+ * existing range is returned in failed_start in this case.
*
- * [start, end] is inclusive
- * This takes the tree lock.
+ * [start, end] is inclusive This takes the tree lock.
*/
-int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
- int exclusive, u64 *failed_start, gfp_t mask)
+
+static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, int exclusive_bits, u64 *failed_start,
+ struct extent_state **cached_state,
+ gfp_t mask)
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
struct rb_node *node;
- unsigned long flags;
int err = 0;
- int set;
u64 last_start;
u64 last_end;
+
again:
if (!prealloc && (mask & __GFP_WAIT)) {
prealloc = alloc_extent_state(mask);
return -ENOMEM;
}
- spin_lock_irqsave(&tree->lock, flags);
+ spin_lock(&tree->lock);
+ if (cached_state && *cached_state) {
+ state = *cached_state;
+ if (state->start == start && state->tree) {
+ node = &state->rb_node;
+ goto hit_next;
+ }
+ }
/*
* this search will find all the extents that end after
* our range starts.
BUG_ON(err == -EEXIST);
goto out;
}
-
state = rb_entry(node, struct extent_state, rb_node);
+hit_next:
last_start = state->start;
last_end = state->end;
* Just lock what we found and keep going
*/
if (state->start == start && state->end <= end) {
- set = state->state & bits;
- if (set && exclusive) {
+ struct rb_node *next_node;
+ if (state->state & exclusive_bits) {
*failed_start = state->start;
err = -EEXIST;
goto out;
}
- set_state_bits(tree, state, bits);
- start = state->end + 1;
+
+ err = set_state_bits(tree, state, bits);
+ if (err)
+ goto out;
+
+ cache_state(state, cached_state);
merge_state(tree, state);
+ if (last_end == (u64)-1)
+ goto out;
+
+ start = last_end + 1;
+ if (start < end && prealloc && !need_resched()) {
+ next_node = rb_next(node);
+ if (next_node) {
+ state = rb_entry(next_node, struct extent_state,
+ rb_node);
+ if (state->start == start)
+ goto hit_next;
+ }
+ }
goto search_again;
}
* desired bit on it.
*/
if (state->start < start) {
- set = state->state & bits;
- if (exclusive && set) {
+ if (state->state & exclusive_bits) {
*failed_start = start;
err = -EEXIST;
goto out;
if (err)
goto out;
if (state->end <= end) {
- set_state_bits(tree, state, bits);
- start = state->end + 1;
+ err = set_state_bits(tree, state, bits);
+ if (err)
+ goto out;
+ cache_state(state, cached_state);
merge_state(tree, state);
- } else {
- start = state->start;
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
}
goto search_again;
}
if (end < last_start)
this_end = end;
else
- this_end = last_start -1;
+ this_end = last_start - 1;
err = insert_state(tree, prealloc, start, this_end,
bits);
- prealloc = NULL;
BUG_ON(err == -EEXIST);
- if (err)
+ if (err) {
+ prealloc = NULL;
goto out;
+ }
+ cache_state(prealloc, cached_state);
+ prealloc = NULL;
start = this_end + 1;
goto search_again;
}
* on the first half
*/
if (state->start <= end && state->end > end) {
- set = state->state & bits;
- if (exclusive && set) {
+ if (state->state & exclusive_bits) {
*failed_start = start;
err = -EEXIST;
goto out;
err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST);
- set_state_bits(tree, prealloc, bits);
+ err = set_state_bits(tree, prealloc, bits);
+ if (err) {
+ prealloc = NULL;
+ goto out;
+ }
+ cache_state(prealloc, cached_state);
merge_state(tree, prealloc);
prealloc = NULL;
goto out;
goto search_again;
out:
- spin_unlock_irqrestore(&tree->lock, flags);
+ spin_unlock(&tree->lock);
if (prealloc)
free_extent_state(prealloc);
search_again:
if (start > end)
goto out;
- spin_unlock_irqrestore(&tree->lock, flags);
+ spin_unlock(&tree->lock);
if (mask & __GFP_WAIT)
cond_resched();
goto again;
}
-EXPORT_SYMBOL(set_extent_bit);
/* wrappers around set/clear extent bit */
int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
- mask);
+ NULL, mask);
}
-EXPORT_SYMBOL(set_extent_dirty);
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask)
{
return set_extent_bit(tree, start, end, bits, 0, NULL,
- mask);
+ NULL, mask);
}
-EXPORT_SYMBOL(set_extent_bits);
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask)
{
- return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
+ return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
}
-EXPORT_SYMBOL(clear_extent_bits);
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
- mask);
+ EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE,
+ 0, NULL, NULL, mask);
}
-EXPORT_SYMBOL(set_extent_delalloc);
int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return clear_extent_bit(tree, start, end,
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, 0, 0,
+ NULL, mask);
}
-EXPORT_SYMBOL(clear_extent_dirty);
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
- mask);
+ NULL, mask);
}
-EXPORT_SYMBOL(set_extent_new);
-int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
+static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
- return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
+ return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
+ NULL, mask);
}
-EXPORT_SYMBOL(clear_extent_new);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
- mask);
-}
-EXPORT_SYMBOL(set_extent_uptodate);
-
-int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
-}
-EXPORT_SYMBOL(clear_extent_uptodate);
-
-int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
- 0, NULL, mask);
+ NULL, mask);
}
-EXPORT_SYMBOL(set_extent_writeback);
-int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
+static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
+ u64 end, gfp_t mask)
{
- return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
+ return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
+ NULL, mask);
}
-EXPORT_SYMBOL(clear_extent_writeback);
int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
{
return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
}
-EXPORT_SYMBOL(wait_on_extent_writeback);
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+/*
+ * either insert or lock state struct between start and end use mask to tell
+ * us if waiting is desired.
+ */
+int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, struct extent_state **cached_state, gfp_t mask)
{
int err;
u64 failed_start;
while (1) {
- err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
- &failed_start, mask);
+ err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
+ EXTENT_LOCKED, &failed_start,
+ cached_state, mask);
if (err == -EEXIST && (mask & __GFP_WAIT)) {
wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
start = failed_start;
}
return err;
}
-EXPORT_SYMBOL(lock_extent);
+
+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+{
+ return lock_extent_bits(tree, start, end, 0, NULL, mask);
+}
+
+int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ int err;
+ u64 failed_start;
+
+ err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
+ &failed_start, NULL, mask);
+ if (err == -EEXIST) {
+ if (failed_start > start)
+ clear_extent_bit(tree, start, failed_start - 1,
+ EXTENT_LOCKED, 1, 0, NULL, mask);
+ return 0;
+ }
+ return 1;
+}
+
+int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
+ struct extent_state **cached, gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
+ mask);
+}
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
- return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
+ mask);
}
-EXPORT_SYMBOL(unlock_extent);
/*
* helper function to set pages and extents in the tree dirty
page_cache_release(page);
index++;
}
- set_extent_dirty(tree, start, end, GFP_NOFS);
return 0;
}
-EXPORT_SYMBOL(set_range_dirty);
/*
* helper function to set both pages and extents in the tree writeback
*/
-int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
+static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
{
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
page_cache_release(page);
index++;
}
- set_extent_writeback(tree, start, end, GFP_NOFS);
return 0;
}
-EXPORT_SYMBOL(set_range_writeback);
+/*
+ * find the first offset in the io tree with 'bits' set. zero is
+ * returned if we find something, and *start_ret and *end_ret are
+ * set to reflect the state struct that was found.
+ *
+ * If nothing was found, 1 is returned, < 0 on error
+ */
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, int bits)
{
struct extent_state *state;
int ret = 1;
- spin_lock_irq(&tree->lock);
+ spin_lock(&tree->lock);
/*
* this search will find all the extents that end after
* our range starts.
*/
node = tree_search(tree, start);
- if (!node || IS_ERR(node)) {
+ if (!node)
goto out;
- }
- while(1) {
+ while (1) {
state = rb_entry(node, struct extent_state, rb_node);
if (state->end >= start && (state->state & bits)) {
*start_ret = state->start;
break;
}
out:
- spin_unlock_irq(&tree->lock);
+ spin_unlock(&tree->lock);
return ret;
}
-EXPORT_SYMBOL(find_first_extent_bit);
-u64 find_lock_delalloc_range(struct extent_io_tree *tree,
- u64 *start, u64 *end, u64 max_bytes)
+/* find the first state struct with 'bits' set after 'start', and
+ * return it. tree->lock must be held. NULL will returned if
+ * nothing was found after 'start'
+ */
+struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
+ u64 start, int bits)
+{
+ struct rb_node *node;
+ struct extent_state *state;
+
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(tree, start);
+ if (!node)
+ goto out;
+
+ while (1) {
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->end >= start && (state->state & bits))
+ return state;
+
+ node = rb_next(node);
+ if (!node)
+ break;
+ }
+out:
+ return NULL;
+}
+
+/*
+ * find a contiguous range of bytes in the file marked as delalloc, not
+ * more than 'max_bytes'. start and end are used to return the range,
+ *
+ * 1 is returned if we find something, 0 if nothing was in the tree
+ */
+static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
+ u64 *start, u64 *end, u64 max_bytes)
{
struct rb_node *node;
struct extent_state *state;
u64 found = 0;
u64 total_bytes = 0;
- spin_lock_irq(&tree->lock);
+ spin_lock(&tree->lock);
+
/*
* this search will find all the extents that end after
* our range starts.
*/
-search_again:
node = tree_search(tree, cur_start);
- if (!node || IS_ERR(node)) {
- *end = (u64)-1;
+ if (!node) {
+ if (!found)
+ *end = (u64)-1;
goto out;
}
- while(1) {
+ while (1) {
state = rb_entry(node, struct extent_state, rb_node);
- if (found && state->start != cur_start) {
+ if (found && (state->start != cur_start ||
+ (state->state & EXTENT_BOUNDARY))) {
goto out;
}
if (!(state->state & EXTENT_DELALLOC)) {
*end = state->end;
goto out;
}
- if (!found) {
- struct extent_state *prev_state;
- struct rb_node *prev_node = node;
- while(1) {
- prev_node = rb_prev(prev_node);
- if (!prev_node)
- break;
- prev_state = rb_entry(prev_node,
- struct extent_state,
- rb_node);
- if (!(prev_state->state & EXTENT_DELALLOC))
- break;
- state = prev_state;
- node = prev_node;
- }
- }
- if (state->state & EXTENT_LOCKED) {
- DEFINE_WAIT(wait);
- atomic_inc(&state->refs);
- prepare_to_wait(&state->wq, &wait,
- TASK_UNINTERRUPTIBLE);
- spin_unlock_irq(&tree->lock);
- schedule();
- spin_lock_irq(&tree->lock);
- finish_wait(&state->wq, &wait);
- free_extent_state(state);
- goto search_again;
- }
- set_state_cb(tree, state, EXTENT_LOCKED);
- state->state |= EXTENT_LOCKED;
if (!found)
*start = state->start;
found++;
break;
}
out:
- spin_unlock_irq(&tree->lock);
+ spin_unlock(&tree->lock);
return found;
}
-u64 count_range_bits(struct extent_io_tree *tree,
- u64 *start, u64 search_end, u64 max_bytes,
- unsigned long bits)
+static noinline int __unlock_for_delalloc(struct inode *inode,
+ struct page *locked_page,
+ u64 start, u64 end)
{
- struct rb_node *node;
- struct extent_state *state;
- u64 cur_start = *start;
- u64 total_bytes = 0;
- int found = 0;
+ int ret;
+ struct page *pages[16];
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ unsigned long nr_pages = end_index - index + 1;
+ int i;
- if (search_end <= cur_start) {
- printk("search_end %Lu start %Lu\n", search_end, cur_start);
- WARN_ON(1);
+ if (index == locked_page->index && end_index == index)
return 0;
- }
- spin_lock_irq(&tree->lock);
- if (cur_start == 0 && bits == EXTENT_DIRTY) {
- total_bytes = tree->dirty_bytes;
- goto out;
- }
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, cur_start);
- if (!node || IS_ERR(node)) {
- goto out;
+ while (nr_pages > 0) {
+ ret = find_get_pages_contig(inode->i_mapping, index,
+ min_t(unsigned long, nr_pages,
+ ARRAY_SIZE(pages)), pages);
+ for (i = 0; i < ret; i++) {
+ if (pages[i] != locked_page)
+ unlock_page(pages[i]);
+ page_cache_release(pages[i]);
+ }
+ nr_pages -= ret;
+ index += ret;
+ cond_resched();
}
+ return 0;
+}
- while(1) {
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->start > search_end)
- break;
- if (state->end >= cur_start && (state->state & bits)) {
- total_bytes += min(search_end, state->end) + 1 -
- max(cur_start, state->start);
- if (total_bytes >= max_bytes)
- break;
- if (!found) {
- *start = state->start;
- found = 1;
+static noinline int lock_delalloc_pages(struct inode *inode,
+ struct page *locked_page,
+ u64 delalloc_start,
+ u64 delalloc_end)
+{
+ unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
+ unsigned long start_index = index;
+ unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
+ unsigned long pages_locked = 0;
+ struct page *pages[16];
+ unsigned long nrpages;
+ int ret;
+ int i;
+
+ /* the caller is responsible for locking the start index */
+ if (index == locked_page->index && index == end_index)
+ return 0;
+
+ /* skip the page at the start index */
+ nrpages = end_index - index + 1;
+ while (nrpages > 0) {
+ ret = find_get_pages_contig(inode->i_mapping, index,
+ min_t(unsigned long,
+ nrpages, ARRAY_SIZE(pages)), pages);
+ if (ret == 0) {
+ ret = -EAGAIN;
+ goto done;
+ }
+ /* now we have an array of pages, lock them all */
+ for (i = 0; i < ret; i++) {
+ /*
+ * the caller is taking responsibility for
+ * locked_page
+ */
+ if (pages[i] != locked_page) {
+ lock_page(pages[i]);
+ if (!PageDirty(pages[i]) ||
+ pages[i]->mapping != inode->i_mapping) {
+ ret = -EAGAIN;
+ unlock_page(pages[i]);
+ page_cache_release(pages[i]);
+ goto done;
+ }
}
+ page_cache_release(pages[i]);
+ pages_locked++;
}
- node = rb_next(node);
- if (!node)
- break;
+ nrpages -= ret;
+ index += ret;
+ cond_resched();
}
-out:
- spin_unlock_irq(&tree->lock);
- return total_bytes;
+ ret = 0;
+done:
+ if (ret && pages_locked) {
+ __unlock_for_delalloc(inode, locked_page,
+ delalloc_start,
+ ((u64)(start_index + pages_locked - 1)) <<
+ PAGE_CACHE_SHIFT);
+ }
+ return ret;
}
+
/*
- * helper function to lock both pages and extents in the tree.
- * pages must be locked first.
+ * find a contiguous range of bytes in the file marked as delalloc, not
+ * more than 'max_bytes'. start and end are used to return the range,
+ *
+ * 1 is returned if we find something, 0 if nothing was in the tree
*/
-int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
+static noinline u64 find_lock_delalloc_range(struct inode *inode,
+ struct extent_io_tree *tree,
+ struct page *locked_page,
+ u64 *start, u64 *end,
+ u64 max_bytes)
{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- struct page *page;
- int err;
+ u64 delalloc_start;
+ u64 delalloc_end;
+ u64 found;
+ struct extent_state *cached_state = NULL;
+ int ret;
+ int loops = 0;
- while (index <= end_index) {
- page = grab_cache_page(tree->mapping, index);
- if (!page) {
- err = -ENOMEM;
- goto failed;
- }
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- goto failed;
- }
- index++;
+again:
+ /* step one, find a bunch of delalloc bytes starting at start */
+ delalloc_start = *start;
+ delalloc_end = 0;
+ found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
+ max_bytes);
+ if (!found || delalloc_end <= *start) {
+ *start = delalloc_start;
+ *end = delalloc_end;
+ return found;
}
- lock_extent(tree, start, end, GFP_NOFS);
- return 0;
-failed:
/*
- * we failed above in getting the page at 'index', so we undo here
- * up to but not including the page at 'index'
+ * start comes from the offset of locked_page. We have to lock
+ * pages in order, so we can't process delalloc bytes before
+ * locked_page
*/
- end_index = index;
- index = start >> PAGE_CACHE_SHIFT;
- while (index < end_index) {
- page = find_get_page(tree->mapping, index);
- unlock_page(page);
- page_cache_release(page);
- index++;
+ if (delalloc_start < *start)
+ delalloc_start = *start;
+
+ /*
+ * make sure to limit the number of pages we try to lock down
+ * if we're looping.
+ */
+ if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
+ delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
+
+ /* step two, lock all the pages after the page that has start */
+ ret = lock_delalloc_pages(inode, locked_page,
+ delalloc_start, delalloc_end);
+ if (ret == -EAGAIN) {
+ /* some of the pages are gone, lets avoid looping by
+ * shortening the size of the delalloc range we're searching
+ */
+ free_extent_state(cached_state);
+ if (!loops) {
+ unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
+ max_bytes = PAGE_CACHE_SIZE - offset;
+ loops = 1;
+ goto again;
+ } else {
+ found = 0;
+ goto out_failed;
+ }
}
- return err;
+ BUG_ON(ret);
+
+ /* step three, lock the state bits for the whole range */
+ lock_extent_bits(tree, delalloc_start, delalloc_end,
+ 0, &cached_state, GFP_NOFS);
+
+ /* then test to make sure it is all still delalloc */
+ ret = test_range_bit(tree, delalloc_start, delalloc_end,
+ EXTENT_DELALLOC, 1, cached_state);
+ if (!ret) {
+ unlock_extent_cached(tree, delalloc_start, delalloc_end,
+ &cached_state, GFP_NOFS);
+ __unlock_for_delalloc(inode, locked_page,
+ delalloc_start, delalloc_end);
+ cond_resched();
+ goto again;
+ }
+ free_extent_state(cached_state);
+ *start = delalloc_start;
+ *end = delalloc_end;
+out_failed:
+ return found;
}
-EXPORT_SYMBOL(lock_range);
-/*
- * helper function to unlock both pages and extents in the tree.
- */
-int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
+int extent_clear_unlock_delalloc(struct inode *inode,
+ struct extent_io_tree *tree,
+ u64 start, u64 end, struct page *locked_page,
+ unsigned long op)
{
+ int ret;
+ struct page *pages[16];
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- struct page *page;
+ unsigned long nr_pages = end_index - index + 1;
+ int i;
+ int clear_bits = 0;
- while (index <= end_index) {
- page = find_get_page(tree->mapping, index);
- unlock_page(page);
- page_cache_release(page);
- index++;
+ if (op & EXTENT_CLEAR_UNLOCK)
+ clear_bits |= EXTENT_LOCKED;
+ if (op & EXTENT_CLEAR_DIRTY)
+ clear_bits |= EXTENT_DIRTY;
+
+ if (op & EXTENT_CLEAR_DELALLOC)
+ clear_bits |= EXTENT_DELALLOC;
+
+ if (op & EXTENT_CLEAR_ACCOUNTING)
+ clear_bits |= EXTENT_DO_ACCOUNTING;
+
+ clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
+ if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
+ EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
+ EXTENT_SET_PRIVATE2)))
+ return 0;
+
+ while (nr_pages > 0) {
+ ret = find_get_pages_contig(inode->i_mapping, index,
+ min_t(unsigned long,
+ nr_pages, ARRAY_SIZE(pages)), pages);
+ for (i = 0; i < ret; i++) {
+
+ if (op & EXTENT_SET_PRIVATE2)
+ SetPagePrivate2(pages[i]);
+
+ if (pages[i] == locked_page) {
+ page_cache_release(pages[i]);
+ continue;
+ }
+ if (op & EXTENT_CLEAR_DIRTY)
+ clear_page_dirty_for_io(pages[i]);
+ if (op & EXTENT_SET_WRITEBACK)
+ set_page_writeback(pages[i]);
+ if (op & EXTENT_END_WRITEBACK)
+ end_page_writeback(pages[i]);
+ if (op & EXTENT_CLEAR_UNLOCK_PAGE)
+ unlock_page(pages[i]);
+ page_cache_release(pages[i]);
+ }
+ nr_pages -= ret;
+ index += ret;
+ cond_resched();
}
- unlock_extent(tree, start, end, GFP_NOFS);
return 0;
}
-EXPORT_SYMBOL(unlock_range);
+/*
+ * count the number of bytes in the tree that have a given bit(s)
+ * set. This can be fairly slow, except for EXTENT_DIRTY which is
+ * cached. The total number found is returned.
+ */
+u64 count_range_bits(struct extent_io_tree *tree,
+ u64 *start, u64 search_end, u64 max_bytes,
+ unsigned long bits)
+{
+ struct rb_node *node;
+ struct extent_state *state;
+ u64 cur_start = *start;
+ u64 total_bytes = 0;
+ int found = 0;
+
+ if (search_end <= cur_start) {
+ WARN_ON(1);
+ return 0;
+ }
+
+ spin_lock(&tree->lock);
+ if (cur_start == 0 && bits == EXTENT_DIRTY) {
+ total_bytes = tree->dirty_bytes;
+ goto out;
+ }
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(tree, cur_start);
+ if (!node)
+ goto out;
+
+ while (1) {
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->start > search_end)
+ break;
+ if (state->end >= cur_start && (state->state & bits)) {
+ total_bytes += min(search_end, state->end) + 1 -
+ max(cur_start, state->start);
+ if (total_bytes >= max_bytes)
+ break;
+ if (!found) {
+ *start = state->start;
+ found = 1;
+ }
+ }
+ node = rb_next(node);
+ if (!node)
+ break;
+ }
+out:
+ spin_unlock(&tree->lock);
+ return total_bytes;
+}
+
+/*
+ * set the private field for a given byte offset in the tree. If there isn't
+ * an extent_state there already, this does nothing.
+ */
int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
{
struct rb_node *node;
struct extent_state *state;
int ret = 0;
- spin_lock_irq(&tree->lock);
+ spin_lock(&tree->lock);
/*
* this search will find all the extents that end after
* our range starts.
*/
node = tree_search(tree, start);
- if (!node || IS_ERR(node)) {
+ if (!node) {
ret = -ENOENT;
goto out;
}
}
state->private = private;
out:
- spin_unlock_irq(&tree->lock);
+ spin_unlock(&tree->lock);
return ret;
}
struct extent_state *state;
int ret = 0;
- spin_lock_irq(&tree->lock);
+ spin_lock(&tree->lock);
/*
* this search will find all the extents that end after
* our range starts.
*/
node = tree_search(tree, start);
- if (!node || IS_ERR(node)) {
+ if (!node) {
ret = -ENOENT;
goto out;
}
}
*private = state->private;
out:
- spin_unlock_irq(&tree->lock);
+ spin_unlock(&tree->lock);
return ret;
}
* range is found set.
*/
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int filled)
+ int bits, int filled, struct extent_state *cached)
{
struct extent_state *state = NULL;
struct rb_node *node;
int bitset = 0;
- unsigned long flags;
- spin_lock_irqsave(&tree->lock, flags);
- node = tree_search(tree, start);
+ spin_lock(&tree->lock);
+ if (cached && cached->tree && cached->start == start)
+ node = &cached->rb_node;
+ else
+ node = tree_search(tree, start);
while (node && start <= end) {
state = rb_entry(node, struct extent_state, rb_node);
bitset = 0;
break;
}
+
+ if (state->end == (u64)-1)
+ break;
+
start = state->end + 1;
if (start > end)
break;
break;
}
}
- spin_unlock_irqrestore(&tree->lock, flags);
+ spin_unlock(&tree->lock);
return bitset;
}
-EXPORT_SYMBOL(test_range_bit);
/*
* helper function to set a given page up to date if all the
{
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 end = start + PAGE_CACHE_SIZE - 1;
- if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
+ if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
SetPageUptodate(page);
return 0;
}
{
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 end = start + PAGE_CACHE_SIZE - 1;
- if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
+ if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
unlock_page(page);
return 0;
}
static int check_page_writeback(struct extent_io_tree *tree,
struct page *page)
{
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 end = start + PAGE_CACHE_SIZE - 1;
- if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
- end_page_writeback(page);
+ end_page_writeback(page);
return 0;
}
* Scheduling is not allowed, so the extent state tree is expected
* to have one and only one object corresponding to this IO.
*/
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
static void end_bio_extent_writepage(struct bio *bio, int err)
-#else
-static int end_bio_extent_writepage(struct bio *bio,
- unsigned int bytes_done, int err)
-#endif
{
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ int uptodate = err == 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct extent_state *state = bio->bi_private;
- struct extent_io_tree *tree = state->tree;
- struct rb_node *node;
+ struct extent_io_tree *tree;
u64 start;
u64 end;
- u64 cur;
int whole_page;
- unsigned long flags;
+ int ret;
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- if (bio->bi_size)
- return 1;
-#endif
do {
struct page *page = bvec->bv_page;
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
+
start = ((u64)page->index << PAGE_CACHE_SHIFT) +
bvec->bv_offset;
end = start + bvec->bv_len - 1;
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
-
- if (!uptodate) {
- clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
- ClearPageUptodate(page);
- SetPageError(page);
- }
-
if (tree->ops && tree->ops->writepage_end_io_hook) {
- tree->ops->writepage_end_io_hook(page, start, end,
- state);
+ ret = tree->ops->writepage_end_io_hook(page, start,
+ end, NULL, uptodate);
+ if (ret)
+ uptodate = 0;
}
- /*
- * bios can get merged in funny ways, and so we need to
- * be careful with the state variable. We know the
- * state won't be merged with others because it has
- * WRITEBACK set, but we can't be sure each biovec is
- * sequential in the file. So, if our cached state
- * doesn't match the expected end, search the tree
- * for the correct one.
- */
-
- spin_lock_irqsave(&tree->lock, flags);
- if (!state || state->end != end) {
- state = NULL;
- node = __etree_search(tree, start, NULL, NULL);
- if (node) {
- state = rb_entry(node, struct extent_state,
- rb_node);
- if (state->end != end ||
- !(state->state & EXTENT_WRITEBACK))
- state = NULL;
- }
- if (!state) {
- spin_unlock_irqrestore(&tree->lock, flags);
- clear_extent_writeback(tree, start,
- end, GFP_ATOMIC);
- goto next_io;
+ if (!uptodate && tree->ops &&
+ tree->ops->writepage_io_failed_hook) {
+ ret = tree->ops->writepage_io_failed_hook(bio, page,
+ start, end, NULL);
+ if (ret == 0) {
+ uptodate = (err == 0);
+ continue;
}
}
- cur = end;
- while(1) {
- struct extent_state *clear = state;
- cur = state->start;
- node = rb_prev(&state->rb_node);
- if (node) {
- state = rb_entry(node,
- struct extent_state,
- rb_node);
- } else {
- state = NULL;
- }
- clear_state_bit(tree, clear, EXTENT_WRITEBACK,
- 1, 0);
- if (cur == start)
- break;
- if (cur < start) {
- WARN_ON(1);
- break;
- }
- if (!node)
- break;
- }
- /* before releasing the lock, make sure the next state
- * variable has the expected bits set and corresponds
- * to the correct offsets in the file
- */
- if (state && (state->end + 1 != start ||
- !(state->state & EXTENT_WRITEBACK))) {
- state = NULL;
+ if (!uptodate) {
+ clear_extent_uptodate(tree, start, end, GFP_NOFS);
+ ClearPageUptodate(page);
+ SetPageError(page);
}
- spin_unlock_irqrestore(&tree->lock, flags);
-next_io:
if (whole_page)
end_page_writeback(page);
else
check_page_writeback(tree, page);
} while (bvec >= bio->bi_io_vec);
+
bio_put(bio);
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- return 0;
-#endif
}
/*
* Scheduling is not allowed, so the extent state tree is expected
* to have one and only one object corresponding to this IO.
*/
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
static void end_bio_extent_readpage(struct bio *bio, int err)
-#else
-static int end_bio_extent_readpage(struct bio *bio,
- unsigned int bytes_done, int err)
-#endif
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct extent_state *state = bio->bi_private;
- struct extent_io_tree *tree = state->tree;
- struct rb_node *node;
+ struct extent_io_tree *tree;
u64 start;
u64 end;
- u64 cur;
- unsigned long flags;
int whole_page;
int ret;
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- if (bio->bi_size)
- return 1;
-#endif
+ if (err)
+ uptodate = 0;
do {
struct page *page = bvec->bv_page;
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
+
start = ((u64)page->index << PAGE_CACHE_SHIFT) +
bvec->bv_offset;
end = start + bvec->bv_len - 1;
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
- state);
+ NULL);
if (ret)
uptodate = 0;
}
-
- spin_lock_irqsave(&tree->lock, flags);
- if (!state || state->end != end) {
- state = NULL;
- node = __etree_search(tree, start, NULL, NULL);
- if (node) {
- state = rb_entry(node, struct extent_state,
- rb_node);
- if (state->end != end ||
- !(state->state & EXTENT_LOCKED))
- state = NULL;
- }
- if (!state) {
- spin_unlock_irqrestore(&tree->lock, flags);
- set_extent_uptodate(tree, start, end,
- GFP_ATOMIC);
- unlock_extent(tree, start, end, GFP_ATOMIC);
- goto next_io;
+ if (!uptodate && tree->ops &&
+ tree->ops->readpage_io_failed_hook) {
+ ret = tree->ops->readpage_io_failed_hook(bio, page,
+ start, end, NULL);
+ if (ret == 0) {
+ uptodate =
+ test_bit(BIO_UPTODATE, &bio->bi_flags);
+ if (err)
+ uptodate = 0;
+ continue;
}
}
- cur = end;
- while(1) {
- struct extent_state *clear = state;
- cur = state->start;
- node = rb_prev(&state->rb_node);
- if (node) {
- state = rb_entry(node,
- struct extent_state,
- rb_node);
- } else {
- state = NULL;
- }
- set_state_cb(tree, clear, EXTENT_UPTODATE);
- clear->state |= EXTENT_UPTODATE;
- clear_state_bit(tree, clear, EXTENT_LOCKED,
- 1, 0);
- if (cur == start)
- break;
- if (cur < start) {
- WARN_ON(1);
- break;
- }
- if (!node)
- break;
- }
- /* before releasing the lock, make sure the next state
- * variable has the expected bits set and corresponds
- * to the correct offsets in the file
- */
- if (state && (state->end + 1 != start ||
- !(state->state & EXTENT_LOCKED))) {
- state = NULL;
+ if (uptodate) {
+ set_extent_uptodate(tree, start, end,
+ GFP_ATOMIC);
}
- spin_unlock_irqrestore(&tree->lock, flags);
-next_io:
+ unlock_extent(tree, start, end, GFP_ATOMIC);
+
if (whole_page) {
if (uptodate) {
SetPageUptodate(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- return 0;
-#endif
}
/*
* the structs in the extent tree when done, and set the uptodate bits
* as appropriate.
*/
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
static void end_bio_extent_preparewrite(struct bio *bio, int err)
-#else
-static int end_bio_extent_preparewrite(struct bio *bio,
- unsigned int bytes_done, int err)
-#endif
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct extent_state *state = bio->bi_private;
- struct extent_io_tree *tree = state->tree;
+ struct extent_io_tree *tree;
u64 start;
u64 end;
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- if (bio->bi_size)
- return 1;
-#endif
-
do {
struct page *page = bvec->bv_page;
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
+
start = ((u64)page->index << PAGE_CACHE_SHIFT) +
bvec->bv_offset;
end = start + bvec->bv_len - 1;
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- return 0;
-#endif
}
static struct bio *
}
if (bio) {
+ bio->bi_size = 0;
bio->bi_bdev = bdev;
bio->bi_sector = first_sector;
}
return bio;
}
-static int submit_one_bio(int rw, struct bio *bio)
+static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
+ unsigned long bio_flags)
{
- u64 maxsector;
int ret = 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct page *page = bvec->bv_page;
struct extent_io_tree *tree = bio->bi_private;
- struct rb_node *node;
- struct extent_state *state;
u64 start;
u64 end;
start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
end = start + bvec->bv_len - 1;
- spin_lock_irq(&tree->lock);
- node = __etree_search(tree, start, NULL, NULL);
- BUG_ON(!node);
- state = rb_entry(node, struct extent_state, rb_node);
- while(state->end < end) {
- node = rb_next(node);
- state = rb_entry(node, struct extent_state, rb_node);
- }
- BUG_ON(state->end != end);
- spin_unlock_irq(&tree->lock);
-
- bio->bi_private = state;
+ bio->bi_private = NULL;
bio_get(bio);
- maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
- if (maxsector < bio->bi_sector) {
- printk("sector too large max %Lu got %llu\n", maxsector,
- (unsigned long long)bio->bi_sector);
- WARN_ON(1);
- }
-
- submit_bio(rw, bio);
+ if (tree->ops && tree->ops->submit_bio_hook)
+ tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
+ mirror_num, bio_flags);
+ else
+ submit_bio(rw, bio);
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
bio_put(bio);
struct block_device *bdev,
struct bio **bio_ret,
unsigned long max_pages,
- bio_end_io_t end_io_func)
+ bio_end_io_t end_io_func,
+ int mirror_num,
+ unsigned long prev_bio_flags,
+ unsigned long bio_flags)
{
int ret = 0;
struct bio *bio;
int nr;
+ int contig = 0;
+ int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
+ int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
+ size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
if (bio_ret && *bio_ret) {
bio = *bio_ret;
- if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
- bio_add_page(bio, page, size, offset) < size) {
- ret = submit_one_bio(rw, bio);
+ if (old_compressed)
+ contig = bio->bi_sector == sector;
+ else
+ contig = bio->bi_sector + (bio->bi_size >> 9) ==
+ sector;
+
+ if (prev_bio_flags != bio_flags || !contig ||
+ (tree->ops && tree->ops->merge_bio_hook &&
+ tree->ops->merge_bio_hook(page, offset, page_size, bio,
+ bio_flags)) ||
+ bio_add_page(bio, page, page_size, offset) < page_size) {
+ ret = submit_one_bio(rw, bio, mirror_num,
+ prev_bio_flags);
bio = NULL;
} else {
return 0;
}
}
- nr = bio_get_nr_vecs(bdev);
- bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
- if (!bio) {
- printk("failed to allocate bio nr %d\n", nr);
- }
+ if (this_compressed)
+ nr = BIO_MAX_PAGES;
+ else
+ nr = bio_get_nr_vecs(bdev);
+ bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
- bio_add_page(bio, page, size, offset);
+ bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
- if (bio_ret) {
+ if (bio_ret)
*bio_ret = bio;
- } else {
- ret = submit_one_bio(rw, bio);
- }
+ else
+ ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
return ret;
}
{
if (!PagePrivate(page)) {
SetPagePrivate(page);
- WARN_ON(!page->mapping->a_ops->invalidatepage);
- set_page_private(page, EXTENT_PAGE_PRIVATE);
page_cache_get(page);
+ set_page_private(page, EXTENT_PAGE_PRIVATE);
}
}
-void set_page_extent_head(struct page *page, unsigned long len)
+static void set_page_extent_head(struct page *page, unsigned long len)
{
set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
}
static int __extent_read_full_page(struct extent_io_tree *tree,
struct page *page,
get_extent_t *get_extent,
- struct bio **bio)
+ struct bio **bio, int mirror_num,
+ unsigned long *bio_flags)
{
struct inode *inode = page->mapping->host;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
int nr = 0;
size_t page_offset = 0;
size_t iosize;
+ size_t disk_io_size;
size_t blocksize = inode->i_sb->s_blocksize;
+ unsigned long this_bio_flag = 0;
set_page_extent_mapped(page);
end = page_end;
lock_extent(tree, start, end, GFP_NOFS);
+ if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
+ char *userpage;
+ size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
+
+ if (zero_offset) {
+ iosize = PAGE_CACHE_SIZE - zero_offset;
+ userpage = kmap_atomic(page, KM_USER0);
+ memset(userpage + zero_offset, 0, iosize);
+ flush_dcache_page(page);
+ kunmap_atomic(userpage, KM_USER0);
+ }
+ }
while (cur <= end) {
if (cur >= last_byte) {
char *userpage;
unlock_extent(tree, cur, end, GFP_NOFS);
break;
}
-
extent_offset = cur - em->start;
BUG_ON(extent_map_end(em) <= cur);
BUG_ON(end < cur);
+ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
+ this_bio_flag = EXTENT_BIO_COMPRESSED;
+
iosize = min(extent_map_end(em) - cur, end - cur + 1);
cur_end = min(extent_map_end(em) - 1, end);
iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
- sector = (em->block_start + extent_offset) >> 9;
+ if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
+ disk_io_size = em->block_len;
+ sector = em->block_start >> 9;
+ } else {
+ sector = (em->block_start + extent_offset) >> 9;
+ disk_io_size = iosize;
+ }
bdev = em->bdev;
block_start = em->block_start;
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ block_start = EXTENT_MAP_HOLE;
free_extent_map(em);
em = NULL;
continue;
}
/* the get_extent function already copied into the page */
- if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
+ if (test_range_bit(tree, cur, cur_end,
+ EXTENT_UPTODATE, 1, NULL)) {
+ check_page_uptodate(tree, page);
unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
cur = cur + iosize;
page_offset += iosize;
cur + iosize - 1);
}
if (!ret) {
- unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
- nr -= page->index;
+ unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+ pnr -= page->index;
ret = submit_extent_page(READ, tree, page,
- sector, iosize, page_offset,
- bdev, bio, nr,
- end_bio_extent_readpage);
+ sector, disk_io_size, page_offset,
+ bdev, bio, pnr,
+ end_bio_extent_readpage, mirror_num,
+ *bio_flags,
+ this_bio_flag);
+ nr++;
+ *bio_flags = this_bio_flag;
}
if (ret)
SetPageError(page);
cur = cur + iosize;
page_offset += iosize;
- nr++;
}
if (!nr) {
if (!PageError(page))
get_extent_t *get_extent)
{
struct bio *bio = NULL;
+ unsigned long bio_flags = 0;
int ret;
- ret = __extent_read_full_page(tree, page, get_extent, &bio);
+ ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
+ &bio_flags);
if (bio)
- submit_one_bio(READ, bio);
+ submit_one_bio(READ, bio, 0, bio_flags);
return ret;
}
-EXPORT_SYMBOL(extent_read_full_page);
+
+static noinline void update_nr_written(struct page *page,
+ struct writeback_control *wbc,
+ unsigned long nr_written)
+{
+ wbc->nr_to_write -= nr_written;
+ if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
+ wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
+ page->mapping->writeback_index = page->index + nr_written;
+}
/*
* the writepage semantics are similar to regular writepage. extent
u64 last_byte = i_size_read(inode);
u64 block_start;
u64 iosize;
+ u64 unlock_start;
sector_t sector;
+ struct extent_state *cached_state = NULL;
struct extent_map *em;
struct block_device *bdev;
int ret;
int nr = 0;
- size_t page_offset = 0;
+ size_t pg_offset = 0;
size_t blocksize;
loff_t i_size = i_size_read(inode);
unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
u64 nr_delalloc;
u64 delalloc_end;
+ int page_started;
+ int compressed;
+ int write_flags;
+ unsigned long nr_written = 0;
+
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ write_flags = WRITE_SYNC_PLUG;
+ else
+ write_flags = WRITE;
WARN_ON(!PageLocked(page));
- if (page->index > end_index) {
- clear_extent_dirty(tree, start, page_end, GFP_NOFS);
+ pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
+ if (page->index > end_index ||
+ (page->index == end_index && !pg_offset)) {
+ page->mapping->a_ops->invalidatepage(page, 0);
unlock_page(page);
return 0;
}
if (page->index == end_index) {
char *userpage;
- size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
-
userpage = kmap_atomic(page, KM_USER0);
- memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset);
- flush_dcache_page(page);
+ memset(userpage + pg_offset, 0,
+ PAGE_CACHE_SIZE - pg_offset);
kunmap_atomic(userpage, KM_USER0);
+ flush_dcache_page(page);
}
+ pg_offset = 0;
set_page_extent_mapped(page);
delalloc_start = start;
delalloc_end = 0;
- while(delalloc_end < page_end) {
- nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start,
+ page_started = 0;
+ if (!epd->extent_locked) {
+ u64 delalloc_to_write = 0;
+ /*
+ * make sure the wbc mapping index is at least updated
+ * to this page.
+ */
+ update_nr_written(page, wbc, 0);
+
+ while (delalloc_end < page_end) {
+ nr_delalloc = find_lock_delalloc_range(inode, tree,
+ page,
+ &delalloc_start,
&delalloc_end,
128 * 1024 * 1024);
- if (nr_delalloc == 0) {
+ if (nr_delalloc == 0) {
+ delalloc_start = delalloc_end + 1;
+ continue;
+ }
+ tree->ops->fill_delalloc(inode, page, delalloc_start,
+ delalloc_end, &page_started,
+ &nr_written);
+ /*
+ * delalloc_end is already one less than the total
+ * length, so we don't subtract one from
+ * PAGE_CACHE_SIZE
+ */
+ delalloc_to_write += (delalloc_end - delalloc_start +
+ PAGE_CACHE_SIZE) >>
+ PAGE_CACHE_SHIFT;
delalloc_start = delalloc_end + 1;
- continue;
}
- tree->ops->fill_delalloc(inode, delalloc_start,
- delalloc_end);
- clear_extent_bit(tree, delalloc_start,
- delalloc_end,
- EXTENT_LOCKED | EXTENT_DELALLOC,
- 1, 0, GFP_NOFS);
- delalloc_start = delalloc_end + 1;
- }
- lock_extent(tree, start, page_end, GFP_NOFS);
+ if (wbc->nr_to_write < delalloc_to_write) {
+ int thresh = 8192;
- end = page_end;
- if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
- printk("found delalloc bits after lock_extent\n");
+ if (delalloc_to_write < thresh * 2)
+ thresh = delalloc_to_write;
+ wbc->nr_to_write = min_t(u64, delalloc_to_write,
+ thresh);
+ }
+
+ /* did the fill delalloc function already unlock and start
+ * the IO?
+ */
+ if (page_started) {
+ ret = 0;
+ /*
+ * we've unlocked the page, so we can't update
+ * the mapping's writeback index, just update
+ * nr_to_write.
+ */
+ wbc->nr_to_write -= nr_written;
+ goto done_unlocked;
+ }
+ }
+ if (tree->ops && tree->ops->writepage_start_hook) {
+ ret = tree->ops->writepage_start_hook(page, start,
+ page_end);
+ if (ret == -EAGAIN) {
+ redirty_page_for_writepage(wbc, page);
+ update_nr_written(page, wbc, nr_written);
+ unlock_page(page);
+ ret = 0;
+ goto done_unlocked;
+ }
}
+ /*
+ * we don't want to touch the inode after unlocking the page,
+ * so we update the mapping writeback index now
+ */
+ update_nr_written(page, wbc, nr_written + 1);
+
+ end = page_end;
if (last_byte <= start) {
- clear_extent_dirty(tree, start, page_end, GFP_NOFS);
+ if (tree->ops && tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, start,
+ page_end, NULL, 1);
+ unlock_start = page_end + 1;
goto done;
}
- set_extent_uptodate(tree, start, page_end, GFP_NOFS);
blocksize = inode->i_sb->s_blocksize;
while (cur <= end) {
if (cur >= last_byte) {
- clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
+ if (tree->ops && tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, cur,
+ page_end, NULL, 1);
+ unlock_start = page_end + 1;
break;
}
- em = epd->get_extent(inode, page, page_offset, cur,
+ em = epd->get_extent(inode, page, pg_offset, cur,
end - cur + 1, 1);
if (IS_ERR(em) || !em) {
SetPageError(page);
sector = (em->block_start + extent_offset) >> 9;
bdev = em->bdev;
block_start = em->block_start;
+ compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
free_extent_map(em);
em = NULL;
- if (block_start == EXTENT_MAP_HOLE ||
+ /*
+ * compressed and inline extents are written through other
+ * paths in the FS
+ */
+ if (compressed || block_start == EXTENT_MAP_HOLE ||
block_start == EXTENT_MAP_INLINE) {
- clear_extent_dirty(tree, cur,
- cur + iosize - 1, GFP_NOFS);
- cur = cur + iosize;
- page_offset += iosize;
+ /*
+ * end_io notification does not happen here for
+ * compressed extents
+ */
+ if (!compressed && tree->ops &&
+ tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, cur,
+ cur + iosize - 1,
+ NULL, 1);
+ else if (compressed) {
+ /* we don't want to end_page_writeback on
+ * a compressed extent. this happens
+ * elsewhere
+ */
+ nr++;
+ }
+
+ cur += iosize;
+ pg_offset += iosize;
+ unlock_start = cur;
continue;
}
-
/* leave this out until we have a page_mkwrite call */
if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
- EXTENT_DIRTY, 0)) {
+ EXTENT_DIRTY, 0, NULL)) {
cur = cur + iosize;
- page_offset += iosize;
+ pg_offset += iosize;
continue;
}
- clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
+
if (tree->ops && tree->ops->writepage_io_hook) {
ret = tree->ops->writepage_io_hook(page, cur,
cur + iosize - 1);
} else {
ret = 0;
}
- if (ret)
+ if (ret) {
SetPageError(page);
- else {
+ } else {
unsigned long max_nr = end_index + 1;
+
set_range_writeback(tree, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
- printk("warning page %lu not writeback, "
- "cur %llu end %llu\n", page->index,
- (unsigned long long)cur,
+ printk(KERN_ERR "btrfs warning page %lu not "
+ "writeback, cur %llu end %llu\n",
+ page->index, (unsigned long long)cur,
(unsigned long long)end);
}
- ret = submit_extent_page(WRITE, tree, page, sector,
- iosize, page_offset, bdev,
- &epd->bio, max_nr,
- end_bio_extent_writepage);
+ ret = submit_extent_page(write_flags, tree, page,
+ sector, iosize, pg_offset,
+ bdev, &epd->bio, max_nr,
+ end_bio_extent_writepage,
+ 0, 0, 0);
if (ret)
SetPageError(page);
}
cur = cur + iosize;
- page_offset += iosize;
+ pg_offset += iosize;
nr++;
}
done:
set_page_writeback(page);
end_page_writeback(page);
}
- unlock_extent(tree, start, page_end, GFP_NOFS);
unlock_page(page);
- return 0;
-}
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+done_unlocked:
-/* Taken directly from 2.6.23 for 2.6.18 back port */
-typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
- void *data);
+ /* drop our reference on any cached states */
+ free_extent_state(cached_state);
+ return 0;
+}
/**
- * write_cache_pages - walk the list of dirty pages of the given address space
- * and write all of them.
+ * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
* @mapping: address space structure to write
* @wbc: subtract the number of written pages from *@wbc->nr_to_write
* @writepage: function called for each page
* WB_SYNC_ALL then we were called for data integrity and we must wait for
* existing IO to complete.
*/
-static int write_cache_pages(struct address_space *mapping,
- struct writeback_control *wbc, writepage_t writepage,
- void *data)
+static int extent_write_cache_pages(struct extent_io_tree *tree,
+ struct address_space *mapping,
+ struct writeback_control *wbc,
+ writepage_t writepage, void *data,
+ void (*flush_fn)(void *))
{
- struct backing_dev_info *bdi = mapping->backing_dev_info;
int ret = 0;
int done = 0;
+ int nr_to_write_done = 0;
struct pagevec pvec;
int nr_pages;
pgoff_t index;
int scanned = 0;
int range_whole = 0;
- if (wbc->nonblocking && bdi_write_congested(bdi)) {
- wbc->encountered_congestion = 1;
- return 0;
- }
-
pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
scanned = 1;
}
retry:
- while (!done && (index <= end) &&
+ while (!done && !nr_to_write_done && (index <= end) &&
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+ PAGECACHE_TAG_DIRTY, min(end - index,
+ (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
unsigned i;
scanned = 1;
* swizzled back from swapper_space to tmpfs file
* mapping
*/
- lock_page(page);
+ if (tree->ops && tree->ops->write_cache_pages_lock_hook)
+ tree->ops->write_cache_pages_lock_hook(page);
+ else
+ lock_page(page);
if (unlikely(page->mapping != mapping)) {
unlock_page(page);
continue;
}
- if (wbc->sync_mode != WB_SYNC_NONE)
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ if (PageWriteback(page))
+ flush_fn(data);
wait_on_page_writeback(page);
+ }
if (PageWriteback(page) ||
!clear_page_dirty_for_io(page)) {
unlock_page(page);
ret = 0;
}
- if (ret || (--(wbc->nr_to_write) <= 0))
- done = 1;
- if (wbc->nonblocking && bdi_write_congested(bdi)) {
- wbc->encountered_congestion = 1;
+ if (ret)
done = 1;
- }
+
+ /*
+ * the filesystem may choose to bump up nr_to_write.
+ * We have to make sure to honor the new nr_to_write
+ * at any time
+ */
+ nr_to_write_done = wbc->nr_to_write <= 0;
}
pagevec_release(&pvec);
cond_resched();
index = 0;
goto retry;
}
- if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
- mapping->writeback_index = index;
return ret;
}
-#endif
+
+static void flush_epd_write_bio(struct extent_page_data *epd)
+{
+ if (epd->bio) {
+ if (epd->sync_io)
+ submit_one_bio(WRITE_SYNC, epd->bio, 0, 0);
+ else
+ submit_one_bio(WRITE, epd->bio, 0, 0);
+ epd->bio = NULL;
+ }
+}
+
+static noinline void flush_write_bio(void *data)
+{
+ struct extent_page_data *epd = data;
+ flush_epd_write_bio(epd);
+}
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent,
.bio = NULL,
.tree = tree,
.get_extent = get_extent,
+ .extent_locked = 0,
+ .sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
.bdi = wbc->bdi,
- .sync_mode = WB_SYNC_NONE,
+ .sync_mode = wbc->sync_mode,
.older_than_this = NULL,
.nr_to_write = 64,
.range_start = page_offset(page) + PAGE_CACHE_SIZE,
.range_end = (loff_t)-1,
};
-
ret = __extent_writepage(page, wbc, &epd);
- write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
- if (epd.bio) {
- submit_one_bio(WRITE, epd.bio);
+ extent_write_cache_pages(tree, mapping, &wbc_writepages,
+ __extent_writepage, &epd, flush_write_bio);
+ flush_epd_write_bio(&epd);
+ return ret;
+}
+
+int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
+ u64 start, u64 end, get_extent_t *get_extent,
+ int mode)
+{
+ int ret = 0;
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+ unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
+ PAGE_CACHE_SHIFT;
+
+ struct extent_page_data epd = {
+ .bio = NULL,
+ .tree = tree,
+ .get_extent = get_extent,
+ .extent_locked = 1,
+ .sync_io = mode == WB_SYNC_ALL,
+ };
+ struct writeback_control wbc_writepages = {
+ .bdi = inode->i_mapping->backing_dev_info,
+ .sync_mode = mode,
+ .older_than_this = NULL,
+ .nr_to_write = nr_pages * 2,
+ .range_start = start,
+ .range_end = end + 1,
+ };
+
+ while (start <= end) {
+ page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+ if (clear_page_dirty_for_io(page))
+ ret = __extent_writepage(page, &wbc_writepages, &epd);
+ else {
+ if (tree->ops && tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, start,
+ start + PAGE_CACHE_SIZE - 1,
+ NULL, 1);
+ unlock_page(page);
+ }
+ page_cache_release(page);
+ start += PAGE_CACHE_SIZE;
}
+
+ flush_epd_write_bio(&epd);
return ret;
}
-EXPORT_SYMBOL(extent_write_full_page);
-
int extent_writepages(struct extent_io_tree *tree,
struct address_space *mapping,
.bio = NULL,
.tree = tree,
.get_extent = get_extent,
+ .extent_locked = 0,
+ .sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
- ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
- if (epd.bio) {
- submit_one_bio(WRITE, epd.bio);
- }
+ ret = extent_write_cache_pages(tree, mapping, wbc,
+ __extent_writepage, &epd,
+ flush_write_bio);
+ flush_epd_write_bio(&epd);
return ret;
}
-EXPORT_SYMBOL(extent_writepages);
int extent_readpages(struct extent_io_tree *tree,
struct address_space *mapping,
struct bio *bio = NULL;
unsigned page_idx;
struct pagevec pvec;
+ unsigned long bio_flags = 0;
pagevec_init(&pvec, 0);
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
/* open coding of lru_cache_add, also not exported */
page_cache_get(page);
if (!pagevec_add(&pvec, page))
- __pagevec_lru_add(&pvec);
- __extent_read_full_page(tree, page, get_extent, &bio);
+ __pagevec_lru_add_file(&pvec);
+ __extent_read_full_page(tree, page, get_extent,
+ &bio, 0, &bio_flags);
}
page_cache_release(page);
}
if (pagevec_count(&pvec))
- __pagevec_lru_add(&pvec);
+ __pagevec_lru_add_file(&pvec);
BUG_ON(!list_empty(pages));
if (bio)
- submit_one_bio(READ, bio);
+ submit_one_bio(READ, bio, 0, bio_flags);
return 0;
}
-EXPORT_SYMBOL(extent_readpages);
/*
* basic invalidatepage code, this waits on any locked or writeback
u64 end = start + PAGE_CACHE_SIZE - 1;
size_t blocksize = page->mapping->host->i_sb->s_blocksize;
- start += (offset + blocksize -1) & ~(blocksize - 1);
+ start += (offset + blocksize - 1) & ~(blocksize - 1);
if (start > end)
return 0;
lock_extent(tree, start, end, GFP_NOFS);
- wait_on_extent_writeback(tree, start, end);
+ wait_on_page_writeback(page);
clear_extent_bit(tree, start, end,
- EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
- 1, 1, GFP_NOFS);
+ EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING,
+ 1, 1, NULL, GFP_NOFS);
return 0;
}
-EXPORT_SYMBOL(extent_invalidatepage);
/*
* simple commit_write call, set_range_dirty is used to mark both
}
return 0;
}
-EXPORT_SYMBOL(extent_commit_write);
int extent_prepare_write(struct extent_io_tree *tree,
struct inode *inode, struct page *page,
orig_block_start = block_start;
lock_extent(tree, page_start, page_end, GFP_NOFS);
- while(block_start <= block_end) {
+ while (block_start <= block_end) {
em = get_extent(inode, page, page_offset, block_start,
block_end - block_start + 1, 1);
- if (IS_ERR(em) || !em) {
+ if (IS_ERR(em) || !em)
goto err;
- }
+
cur_end = min(block_end, extent_map_end(em) - 1);
block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
block_off_end = block_off_start + blocksize;
!isnew && !PageUptodate(page) &&
(block_off_end > to || block_off_start < from) &&
!test_range_bit(tree, block_start, cur_end,
- EXTENT_UPTODATE, 1)) {
+ EXTENT_UPTODATE, 1, NULL)) {
u64 sector;
u64 extent_offset = block_start - em->start;
size_t iosize;
*/
set_extent_bit(tree, block_start,
block_start + iosize - 1,
- EXTENT_LOCKED, 0, NULL, GFP_NOFS);
+ EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
ret = submit_extent_page(READ, tree, page,
sector, iosize, page_offset, em->bdev,
NULL, 1,
- end_bio_extent_preparewrite);
+ end_bio_extent_preparewrite, 0,
+ 0, 0);
iocount++;
block_start = block_start + iosize;
} else {
/* FIXME, zero out newly allocated blocks on error */
return err;
}
-EXPORT_SYMBOL(extent_prepare_write);
+
+/*
+ * a helper for releasepage, this tests for areas of the page that
+ * are locked or under IO and drops the related state bits if it is safe
+ * to drop the page.
+ */
+int try_release_extent_state(struct extent_map_tree *map,
+ struct extent_io_tree *tree, struct page *page,
+ gfp_t mask)
+{
+ u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 end = start + PAGE_CACHE_SIZE - 1;
+ int ret = 1;
+
+ if (test_range_bit(tree, start, end,
+ EXTENT_IOBITS, 0, NULL))
+ ret = 0;
+ else {
+ if ((mask & GFP_NOFS) == GFP_NOFS)
+ mask = GFP_NOFS;
+ /*
+ * at this point we can safely clear everything except the
+ * locked bit and the nodatasum bit
+ */
+ clear_extent_bit(tree, start, end,
+ ~(EXTENT_LOCKED | EXTENT_NODATASUM),
+ 0, 0, NULL, mask);
+ }
+ return ret;
+}
/*
* a helper for releasepage. As long as there are no locked extents
struct extent_map *em;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 end = start + PAGE_CACHE_SIZE - 1;
- u64 orig_start = start;
- int ret = 1;
if ((mask & __GFP_WAIT) &&
page->mapping->host->i_size > 16 * 1024 * 1024) {
+ u64 len;
while (start <= end) {
- spin_lock(&map->lock);
- em = lookup_extent_mapping(map, start, end);
+ len = end - start + 1;
+ write_lock(&map->lock);
+ em = lookup_extent_mapping(map, start, len);
if (!em || IS_ERR(em)) {
- spin_unlock(&map->lock);
+ write_unlock(&map->lock);
break;
}
- if (em->start != start) {
- spin_unlock(&map->lock);
+ if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
+ em->start != start) {
+ write_unlock(&map->lock);
free_extent_map(em);
break;
}
if (!test_range_bit(tree, em->start,
extent_map_end(em) - 1,
- EXTENT_LOCKED, 0)) {
+ EXTENT_LOCKED | EXTENT_WRITEBACK,
+ 0, NULL)) {
remove_extent_mapping(map, em);
/* once for the rb tree */
free_extent_map(em);
}
start = extent_map_end(em);
- spin_unlock(&map->lock);
+ write_unlock(&map->lock);
/* once for us */
free_extent_map(em);
}
}
- if (test_range_bit(tree, orig_start, end, EXTENT_IOBITS, 0))
- ret = 0;
- else {
- if ((mask & GFP_NOFS) == GFP_NOFS)
- mask = GFP_NOFS;
- clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
- 1, 1, mask);
- }
- return ret;
+ return try_release_extent_state(map, tree, page, mask);
}
-EXPORT_SYMBOL(try_release_extent_mapping);
sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
get_extent_t *get_extent)
struct inode *inode = mapping->host;
u64 start = iblock << inode->i_blkbits;
sector_t sector = 0;
+ size_t blksize = (1 << inode->i_blkbits);
struct extent_map *em;
- em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0);
+ lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
+ GFP_NOFS);
+ em = get_extent(inode, NULL, 0, start, blksize, 0);
+ unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
+ GFP_NOFS);
if (!em || IS_ERR(em))
return 0;
- if (em->block_start == EXTENT_MAP_INLINE ||
- em->block_start == EXTENT_MAP_HOLE)
+ if (em->block_start > EXTENT_MAP_LAST_BYTE)
goto out;
sector = (em->block_start + start - em->start) >> inode->i_blkbits;
return sector;
}
-static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb)
+int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ __u64 start, __u64 len, get_extent_t *get_extent)
{
- if (list_empty(&eb->lru)) {
- extent_buffer_get(eb);
- list_add(&eb->lru, &tree->buffer_lru);
- tree->lru_size++;
- if (tree->lru_size >= BUFFER_LRU_MAX) {
- struct extent_buffer *rm;
- rm = list_entry(tree->buffer_lru.prev,
- struct extent_buffer, lru);
- tree->lru_size--;
- list_del_init(&rm->lru);
- free_extent_buffer(rm);
+ int ret;
+ u64 off = start;
+ u64 max = start + len;
+ u32 flags = 0;
+ u64 disko = 0;
+ struct extent_map *em = NULL;
+ int end = 0;
+ u64 em_start = 0, em_len = 0;
+ unsigned long emflags;
+ ret = 0;
+
+ if (len == 0)
+ return -EINVAL;
+
+ lock_extent(&BTRFS_I(inode)->io_tree, start, start + len,
+ GFP_NOFS);
+ em = get_extent(inode, NULL, 0, off, max - off, 0);
+ if (!em)
+ goto out;
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+ while (!end) {
+ off = em->start + em->len;
+ if (off >= max)
+ end = 1;
+
+ em_start = em->start;
+ em_len = em->len;
+
+ disko = 0;
+ flags = 0;
+
+ if (em->block_start == EXTENT_MAP_LAST_BYTE) {
+ end = 1;
+ flags |= FIEMAP_EXTENT_LAST;
+ } else if (em->block_start == EXTENT_MAP_HOLE) {
+ flags |= FIEMAP_EXTENT_UNWRITTEN;
+ } else if (em->block_start == EXTENT_MAP_INLINE) {
+ flags |= (FIEMAP_EXTENT_DATA_INLINE |
+ FIEMAP_EXTENT_NOT_ALIGNED);
+ } else if (em->block_start == EXTENT_MAP_DELALLOC) {
+ flags |= (FIEMAP_EXTENT_DELALLOC |
+ FIEMAP_EXTENT_UNKNOWN);
+ } else {
+ disko = em->block_start;
}
- } else
- list_move(&eb->lru, &tree->buffer_lru);
- return 0;
-}
-static struct extent_buffer *find_lru(struct extent_io_tree *tree,
- u64 start, unsigned long len)
-{
- struct list_head *lru = &tree->buffer_lru;
- struct list_head *cur = lru->next;
- struct extent_buffer *eb;
+ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
+ flags |= FIEMAP_EXTENT_ENCODED;
- if (list_empty(lru))
- return NULL;
+ emflags = em->flags;
+ free_extent_map(em);
+ em = NULL;
- do {
- eb = list_entry(cur, struct extent_buffer, lru);
- if (eb->start == start && eb->len == len) {
- extent_buffer_get(eb);
- return eb;
+ if (!end) {
+ em = get_extent(inode, NULL, 0, off, max - off, 0);
+ if (!em)
+ goto out;
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+ emflags = em->flags;
+ }
+ if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
+ flags |= FIEMAP_EXTENT_LAST;
+ end = 1;
}
- cur = cur->next;
- } while (cur != lru);
- return NULL;
-}
-static inline unsigned long num_extent_pages(u64 start, u64 len)
-{
- return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
- (start >> PAGE_CACHE_SHIFT);
+ ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+ em_len, flags);
+ if (ret)
+ goto out_free;
+ }
+out_free:
+ free_extent_map(em);
+out:
+ unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len,
+ GFP_NOFS);
+ return ret;
}
static inline struct page *extent_buffer_page(struct extent_buffer *eb,
return eb->first_page;
i += eb->start >> PAGE_CACHE_SHIFT;
mapping = eb->first_page->mapping;
- read_lock_irq(&mapping->tree_lock);
+ if (!mapping)
+ return NULL;
+
+ /*
+ * extent_buffer_page is only called after pinning the page
+ * by increasing the reference count. So we know the page must
+ * be in the radix tree.
+ */
+ rcu_read_lock();
p = radix_tree_lookup(&mapping->page_tree, i);
- read_unlock_irq(&mapping->tree_lock);
+ rcu_read_unlock();
+
return p;
}
+static inline unsigned long num_extent_pages(u64 start, u64 len)
+{
+ return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+ (start >> PAGE_CACHE_SHIFT);
+}
+
static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
u64 start,
unsigned long len,
gfp_t mask)
{
struct extent_buffer *eb = NULL;
-
- spin_lock(&tree->lru_lock);
- eb = find_lru(tree, start, len);
- spin_unlock(&tree->lru_lock);
- if (eb) {
- return eb;
- }
+#if LEAK_DEBUG
+ unsigned long flags;
+#endif
eb = kmem_cache_zalloc(extent_buffer_cache, mask);
- INIT_LIST_HEAD(&eb->lru);
eb->start = start;
eb->len = len;
+ spin_lock_init(&eb->lock);
+ init_waitqueue_head(&eb->lock_wq);
+
+#if LEAK_DEBUG
+ spin_lock_irqsave(&leak_lock, flags);
+ list_add(&eb->leak_list, &buffers);
+ spin_unlock_irqrestore(&leak_lock, flags);
+#endif
atomic_set(&eb->refs, 1);
return eb;
static void __free_extent_buffer(struct extent_buffer *eb)
{
+#if LEAK_DEBUG
+ unsigned long flags;
+ spin_lock_irqsave(&leak_lock, flags);
+ list_del(&eb->leak_list);
+ spin_unlock_irqrestore(&leak_lock, flags);
+#endif
kmem_cache_free(extent_buffer_cache, eb);
}
unsigned long i;
unsigned long index = start >> PAGE_CACHE_SHIFT;
struct extent_buffer *eb;
+ struct extent_buffer *exists = NULL;
struct page *p;
struct address_space *mapping = tree->mapping;
int uptodate = 1;
+ spin_lock(&tree->buffer_lock);
+ eb = buffer_search(tree, start);
+ if (eb) {
+ atomic_inc(&eb->refs);
+ spin_unlock(&tree->buffer_lock);
+ mark_page_accessed(eb->first_page);
+ return eb;
+ }
+ spin_unlock(&tree->buffer_lock);
+
eb = __alloc_extent_buffer(tree, start, len, mask);
- if (!eb || IS_ERR(eb))
+ if (!eb)
return NULL;
- if (eb->flags & EXTENT_BUFFER_FILLED)
- goto lru_add;
-
if (page0) {
eb->first_page = page0;
i = 1;
page_cache_get(page0);
mark_page_accessed(page0);
set_page_extent_mapped(page0);
- WARN_ON(!PageUptodate(page0));
set_page_extent_head(page0, len);
+ uptodate = PageUptodate(page0);
} else {
i = 0;
}
p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
if (!p) {
WARN_ON(1);
- goto fail;
+ goto free_eb;
}
set_page_extent_mapped(p);
mark_page_accessed(p);
unlock_page(p);
}
if (uptodate)
- eb->flags |= EXTENT_UPTODATE;
- eb->flags |= EXTENT_BUFFER_FILLED;
+ set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+
+ spin_lock(&tree->buffer_lock);
+ exists = buffer_tree_insert(tree, start, &eb->rb_node);
+ if (exists) {
+ /* add one reference for the caller */
+ atomic_inc(&exists->refs);
+ spin_unlock(&tree->buffer_lock);
+ goto free_eb;
+ }
+ spin_unlock(&tree->buffer_lock);
-lru_add:
- spin_lock(&tree->lru_lock);
- add_lru(tree, eb);
- spin_unlock(&tree->lru_lock);
+ /* add one reference for the tree */
+ atomic_inc(&eb->refs);
return eb;
-fail:
- spin_lock(&tree->lru_lock);
- list_del_init(&eb->lru);
- spin_unlock(&tree->lru_lock);
+free_eb:
if (!atomic_dec_and_test(&eb->refs))
- return NULL;
- for (index = 1; index < i; index++) {
+ return exists;
+ for (index = 1; index < i; index++)
page_cache_release(extent_buffer_page(eb, index));
- }
- if (i > 0)
- page_cache_release(extent_buffer_page(eb, 0));
+ page_cache_release(extent_buffer_page(eb, 0));
__free_extent_buffer(eb);
- return NULL;
+ return exists;
}
-EXPORT_SYMBOL(alloc_extent_buffer);
struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len,
gfp_t mask)
{
- unsigned long num_pages = num_extent_pages(start, len);
- unsigned long i;
- unsigned long index = start >> PAGE_CACHE_SHIFT;
struct extent_buffer *eb;
- struct page *p;
- struct address_space *mapping = tree->mapping;
- int uptodate = 1;
-
- eb = __alloc_extent_buffer(tree, start, len, mask);
- if (!eb || IS_ERR(eb))
- return NULL;
-
- if (eb->flags & EXTENT_BUFFER_FILLED)
- goto lru_add;
-
- for (i = 0; i < num_pages; i++, index++) {
- p = find_lock_page(mapping, index);
- if (!p) {
- goto fail;
- }
- set_page_extent_mapped(p);
- mark_page_accessed(p);
- if (i == 0) {
- eb->first_page = p;
- set_page_extent_head(p, len);
- } else {
- set_page_private(p, EXTENT_PAGE_PRIVATE);
- }
+ spin_lock(&tree->buffer_lock);
+ eb = buffer_search(tree, start);
+ if (eb)
+ atomic_inc(&eb->refs);
+ spin_unlock(&tree->buffer_lock);
- if (!PageUptodate(p))
- uptodate = 0;
- unlock_page(p);
- }
- if (uptodate)
- eb->flags |= EXTENT_UPTODATE;
- eb->flags |= EXTENT_BUFFER_FILLED;
+ if (eb)
+ mark_page_accessed(eb->first_page);
-lru_add:
- spin_lock(&tree->lru_lock);
- add_lru(tree, eb);
- spin_unlock(&tree->lru_lock);
return eb;
-fail:
- spin_lock(&tree->lru_lock);
- list_del_init(&eb->lru);
- spin_unlock(&tree->lru_lock);
- if (!atomic_dec_and_test(&eb->refs))
- return NULL;
- for (index = 1; index < i; index++) {
- page_cache_release(extent_buffer_page(eb, index));
- }
- if (i > 0)
- page_cache_release(extent_buffer_page(eb, 0));
- __free_extent_buffer(eb);
- return NULL;
}
-EXPORT_SYMBOL(find_extent_buffer);
void free_extent_buffer(struct extent_buffer *eb)
{
- unsigned long i;
- unsigned long num_pages;
-
if (!eb)
return;
if (!atomic_dec_and_test(&eb->refs))
return;
- WARN_ON(!list_empty(&eb->lru));
- num_pages = num_extent_pages(eb->start, eb->len);
-
- for (i = 1; i < num_pages; i++) {
- page_cache_release(extent_buffer_page(eb, i));
- }
- page_cache_release(extent_buffer_page(eb, 0));
- __free_extent_buffer(eb);
+ WARN_ON(1);
}
-EXPORT_SYMBOL(free_extent_buffer);
int clear_extent_buffer_dirty(struct extent_io_tree *tree,
struct extent_buffer *eb)
{
- int set;
unsigned long i;
unsigned long num_pages;
struct page *page;
- u64 start = eb->start;
- u64 end = start + eb->len - 1;
-
- set = clear_extent_dirty(tree, start, end, GFP_NOFS);
num_pages = num_extent_pages(eb->start, eb->len);
for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
+ if (!PageDirty(page))
+ continue;
+
lock_page(page);
if (i == 0)
set_page_extent_head(page, eb->len);
else
set_page_private(page, EXTENT_PAGE_PRIVATE);
- /*
- * if we're on the last page or the first page and the
- * block isn't aligned on a page boundary, do extra checks
- * to make sure we don't clean page that is partially dirty
- */
- if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
- ((i == num_pages - 1) &&
- ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
- start = (u64)page->index << PAGE_CACHE_SHIFT;
- end = start + PAGE_CACHE_SIZE - 1;
- if (test_range_bit(tree, start, end,
- EXTENT_DIRTY, 0)) {
- unlock_page(page);
- continue;
- }
- }
clear_page_dirty_for_io(page);
- read_lock_irq(&page->mapping->tree_lock);
+ spin_lock_irq(&page->mapping->tree_lock);
if (!PageDirty(page)) {
radix_tree_tag_clear(&page->mapping->page_tree,
page_index(page),
PAGECACHE_TAG_DIRTY);
}
- read_unlock_irq(&page->mapping->tree_lock);
+ spin_unlock_irq(&page->mapping->tree_lock);
unlock_page(page);
}
return 0;
}
-EXPORT_SYMBOL(clear_extent_buffer_dirty);
int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
struct extent_buffer *eb)
return wait_on_extent_writeback(tree, eb->start,
eb->start + eb->len - 1);
}
-EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
int set_extent_buffer_dirty(struct extent_io_tree *tree,
struct extent_buffer *eb)
{
unsigned long i;
unsigned long num_pages;
+ int was_dirty = 0;
+ was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
num_pages = num_extent_pages(eb->start, eb->len);
- for (i = 0; i < num_pages; i++) {
- struct page *page = extent_buffer_page(eb, i);
- /* writepage may need to do something special for the
- * first page, we have to make sure page->private is
- * properly set. releasepage may drop page->private
- * on us if the page isn't already dirty.
- */
- if (i == 0) {
- lock_page(page);
- set_page_extent_head(page, eb->len);
- } else if (PagePrivate(page) &&
- page->private != EXTENT_PAGE_PRIVATE) {
- lock_page(page);
- set_page_extent_mapped(page);
- unlock_page(page);
- }
+ for (i = 0; i < num_pages; i++)
__set_page_dirty_nobuffers(extent_buffer_page(eb, i));
- if (i == 0)
- unlock_page(page);
+ return was_dirty;
+}
+
+int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
+ struct extent_buffer *eb)
+{
+ unsigned long i;
+ struct page *page;
+ unsigned long num_pages;
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+ clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+
+ clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+ GFP_NOFS);
+ for (i = 0; i < num_pages; i++) {
+ page = extent_buffer_page(eb, i);
+ if (page)
+ ClearPageUptodate(page);
}
- return set_extent_dirty(tree, eb->start,
- eb->start + eb->len - 1, GFP_NOFS);
+ return 0;
}
-EXPORT_SYMBOL(set_extent_buffer_dirty);
int set_extent_buffer_uptodate(struct extent_io_tree *tree,
struct extent_buffer *eb)
}
return 0;
}
-EXPORT_SYMBOL(set_extent_buffer_uptodate);
+
+int extent_range_uptodate(struct extent_io_tree *tree,
+ u64 start, u64 end)
+{
+ struct page *page;
+ int ret;
+ int pg_uptodate = 1;
+ int uptodate;
+ unsigned long index;
+
+ ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
+ if (ret)
+ return 1;
+ while (start <= end) {
+ index = start >> PAGE_CACHE_SHIFT;
+ page = find_get_page(tree->mapping, index);
+ uptodate = PageUptodate(page);
+ page_cache_release(page);
+ if (!uptodate) {
+ pg_uptodate = 0;
+ break;
+ }
+ start += PAGE_CACHE_SIZE;
+ }
+ return pg_uptodate;
+}
int extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb)
+ struct extent_buffer *eb)
{
- if (eb->flags & EXTENT_UPTODATE)
+ int ret = 0;
+ unsigned long num_pages;
+ unsigned long i;
+ struct page *page;
+ int pg_uptodate = 1;
+
+ if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 1;
- return test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1);
+
+ ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+ EXTENT_UPTODATE, 1, NULL);
+ if (ret)
+ return ret;
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+ for (i = 0; i < num_pages; i++) {
+ page = extent_buffer_page(eb, i);
+ if (!PageUptodate(page)) {
+ pg_uptodate = 0;
+ break;
+ }
+ }
+ return pg_uptodate;
}
-EXPORT_SYMBOL(extent_buffer_uptodate);
int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb,
- u64 start,
- int wait)
+ u64 start, int wait,
+ get_extent_t *get_extent, int mirror_num)
{
unsigned long i;
unsigned long start_i;
struct page *page;
int err;
int ret = 0;
+ int locked_pages = 0;
+ int all_uptodate = 1;
+ int inc_all_pages = 0;
unsigned long num_pages;
+ struct bio *bio = NULL;
+ unsigned long bio_flags = 0;
- if (eb->flags & EXTENT_UPTODATE)
+ if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
- if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1)) {
+ if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+ EXTENT_UPTODATE, 1, NULL)) {
return 0;
}
num_pages = num_extent_pages(eb->start, eb->len);
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
- if (PageUptodate(page)) {
- continue;
- }
if (!wait) {
- if (TestSetPageLocked(page)) {
- continue;
- }
+ if (!trylock_page(page))
+ goto unlock_exit;
} else {
lock_page(page);
}
+ locked_pages++;
+ if (!PageUptodate(page))
+ all_uptodate = 0;
+ }
+ if (all_uptodate) {
+ if (start_i == 0)
+ set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+ goto unlock_exit;
+ }
+
+ for (i = start_i; i < num_pages; i++) {
+ page = extent_buffer_page(eb, i);
+ if (inc_all_pages)
+ page_cache_get(page);
if (!PageUptodate(page)) {
- err = page->mapping->a_ops->readpage(NULL, page);
- if (err) {
+ if (start_i == 0)
+ inc_all_pages = 1;
+ ClearPageError(page);
+ err = __extent_read_full_page(tree, page,
+ get_extent, &bio,
+ mirror_num, &bio_flags);
+ if (err)
ret = err;
- }
} else {
unlock_page(page);
}
}
- if (ret || !wait) {
+ if (bio)
+ submit_one_bio(READ, bio, mirror_num, bio_flags);
+
+ if (ret || !wait)
return ret;
- }
+
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
wait_on_page_locked(page);
- if (!PageUptodate(page)) {
+ if (!PageUptodate(page))
ret = -EIO;
- }
}
+
if (!ret)
- eb->flags |= EXTENT_UPTODATE;
+ set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+ return ret;
+
+unlock_exit:
+ i = start_i;
+ while (locked_pages > 0) {
+ page = extent_buffer_page(eb, i);
+ i++;
+ unlock_page(page);
+ locked_pages--;
+ }
return ret;
}
-EXPORT_SYMBOL(read_extent_buffer_pages);
void read_extent_buffer(struct extent_buffer *eb, void *dstv,
unsigned long start,
char *dst = (char *)dstv;
size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
- unsigned long num_pages = num_extent_pages(eb->start, eb->len);
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
- while(len > 0) {
+ while (len > 0) {
page = extent_buffer_page(eb, i);
- if (!PageUptodate(page)) {
- printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len);
- WARN_ON(1);
- }
- WARN_ON(!PageUptodate(page));
cur = min(len, (PAGE_CACHE_SIZE - offset));
kaddr = kmap_atomic(page, KM_USER1);
i++;
}
}
-EXPORT_SYMBOL(read_extent_buffer);
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
unsigned long min_len, char **token, char **map,
offset = 0;
*map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
}
+
if (start + min_len > eb->len) {
-printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
+ printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
+ "wanted %lu %lu\n", (unsigned long long)eb->start,
+ eb->len, start, min_len);
WARN_ON(1);
}
p = extent_buffer_page(eb, i);
- WARN_ON(!PageUptodate(p));
kaddr = kmap_atomic(p, km);
*token = kaddr;
*map = kaddr + offset;
*map_len = PAGE_CACHE_SIZE - offset;
return 0;
}
-EXPORT_SYMBOL(map_private_extent_buffer);
int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
unsigned long min_len,
}
return err;
}
-EXPORT_SYMBOL(map_extent_buffer);
void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
{
kunmap_atomic(token, km);
}
-EXPORT_SYMBOL(unmap_extent_buffer);
int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
unsigned long start,
offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
- while(len > 0) {
+ while (len > 0) {
page = extent_buffer_page(eb, i);
- WARN_ON(!PageUptodate(page));
cur = min(len, (PAGE_CACHE_SIZE - offset));
}
return ret;
}
-EXPORT_SYMBOL(memcmp_extent_buffer);
void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
unsigned long start, unsigned long len)
offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
- while(len > 0) {
+ while (len > 0) {
page = extent_buffer_page(eb, i);
WARN_ON(!PageUptodate(page));
i++;
}
}
-EXPORT_SYMBOL(write_extent_buffer);
void memset_extent_buffer(struct extent_buffer *eb, char c,
unsigned long start, unsigned long len)
offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
- while(len > 0) {
+ while (len > 0) {
page = extent_buffer_page(eb, i);
WARN_ON(!PageUptodate(page));
i++;
}
}
-EXPORT_SYMBOL(memset_extent_buffer);
void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
unsigned long dst_offset, unsigned long src_offset,
offset = (start_offset + dst_offset) &
((unsigned long)PAGE_CACHE_SIZE - 1);
- while(len > 0) {
+ while (len > 0) {
page = extent_buffer_page(dst, i);
WARN_ON(!PageUptodate(page));
i++;
}
}
-EXPORT_SYMBOL(copy_extent_buffer);
static void move_pages(struct page *dst_page, struct page *src_page,
unsigned long dst_off, unsigned long src_off,
unsigned long src_i;
if (src_offset + len > dst->len) {
- printk("memmove bogus src_offset %lu move len %lu len %lu\n",
- src_offset, len, dst->len);
+ printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
+ "len %lu dst len %lu\n", src_offset, len, dst->len);
BUG_ON(1);
}
if (dst_offset + len > dst->len) {
- printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
- dst_offset, len, dst->len);
+ printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
+ "len %lu dst len %lu\n", dst_offset, len, dst->len);
BUG_ON(1);
}
- while(len > 0) {
+ while (len > 0) {
dst_off_in_page = (start_offset + dst_offset) &
((unsigned long)PAGE_CACHE_SIZE - 1);
src_off_in_page = (start_offset + src_offset) &
len -= cur;
}
}
-EXPORT_SYMBOL(memcpy_extent_buffer);
void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_offset, unsigned long len)
unsigned long src_i;
if (src_offset + len > dst->len) {
- printk("memmove bogus src_offset %lu move len %lu len %lu\n",
- src_offset, len, dst->len);
+ printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
+ "len %lu len %lu\n", src_offset, len, dst->len);
BUG_ON(1);
}
if (dst_offset + len > dst->len) {
- printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
- dst_offset, len, dst->len);
+ printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
+ "len %lu len %lu\n", dst_offset, len, dst->len);
BUG_ON(1);
}
if (dst_offset < src_offset) {
memcpy_extent_buffer(dst, dst_offset, src_offset, len);
return;
}
- while(len > 0) {
+ while (len > 0) {
dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
len -= cur;
}
}
-EXPORT_SYMBOL(memmove_extent_buffer);
+
+int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
+{
+ u64 start = page_offset(page);
+ struct extent_buffer *eb;
+ int ret = 1;
+ unsigned long i;
+ unsigned long num_pages;
+
+ spin_lock(&tree->buffer_lock);
+ eb = buffer_search(tree, start);
+ if (!eb)
+ goto out;
+
+ if (atomic_read(&eb->refs) > 1) {
+ ret = 0;
+ goto out;
+ }
+ if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+ ret = 0;
+ goto out;
+ }
+ /* at this point we can safely release the extent buffer */
+ num_pages = num_extent_pages(eb->start, eb->len);
+ for (i = 0; i < num_pages; i++)
+ page_cache_release(extent_buffer_page(eb, i));
+ rb_erase(&eb->rb_node, &tree->buffer);
+ __free_extent_buffer(eb);
+out:
+ spin_unlock(&tree->buffer_lock);
+ return ret;
+}