1 #include <linux/bitops.h>
2 #include <linux/slab.h>
6 #include <linux/pagemap.h>
7 #include <linux/page-flags.h>
8 #include <linux/module.h>
9 #include <linux/spinlock.h>
10 #include <linux/blkdev.h>
11 #include "extent_map.h"
13 /* temporary define until extent_map moves out of btrfs */
14 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
15 unsigned long extra_flags,
16 void (*ctor)(void *, struct kmem_cache *,
19 static struct kmem_cache *extent_map_cache;
20 static struct kmem_cache *extent_state_cache;
26 struct rb_node rb_node;
29 void __init extent_map_init(void)
31 extent_map_cache = btrfs_cache_create("extent_map",
32 sizeof(struct extent_map),
35 extent_state_cache = btrfs_cache_create("extent_state",
36 sizeof(struct extent_state),
41 void __exit extent_map_exit(void)
44 kmem_cache_destroy(extent_map_cache);
45 if (extent_state_cache)
46 kmem_cache_destroy(extent_state_cache);
49 void extent_map_tree_init(struct extent_map_tree *tree,
50 struct address_space *mapping, gfp_t mask)
52 tree->map.rb_node = NULL;
53 tree->state.rb_node = NULL;
55 rwlock_init(&tree->lock);
56 tree->mapping = mapping;
58 EXPORT_SYMBOL(extent_map_tree_init);
60 struct extent_map *alloc_extent_map(gfp_t mask)
62 struct extent_map *em;
63 em = kmem_cache_alloc(extent_map_cache, mask);
64 if (!em || IS_ERR(em))
67 atomic_set(&em->refs, 1);
70 EXPORT_SYMBOL(alloc_extent_map);
72 void free_extent_map(struct extent_map *em)
76 if (atomic_dec_and_test(&em->refs)) {
78 kmem_cache_free(extent_map_cache, em);
81 EXPORT_SYMBOL(free_extent_map);
84 struct extent_state *alloc_extent_state(gfp_t mask)
86 struct extent_state *state;
87 state = kmem_cache_alloc(extent_state_cache, mask);
88 if (!state || IS_ERR(state))
93 atomic_set(&state->refs, 1);
94 init_waitqueue_head(&state->wq);
97 EXPORT_SYMBOL(alloc_extent_state);
99 void free_extent_state(struct extent_state *state)
103 if (atomic_dec_and_test(&state->refs)) {
104 WARN_ON(state->in_tree);
105 kmem_cache_free(extent_state_cache, state);
108 EXPORT_SYMBOL(free_extent_state);
110 static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
111 struct rb_node *node)
113 struct rb_node ** p = &root->rb_node;
114 struct rb_node * parent = NULL;
115 struct tree_entry *entry;
119 entry = rb_entry(parent, struct tree_entry, rb_node);
121 if (offset < entry->start)
123 else if (offset > entry->end)
129 entry = rb_entry(node, struct tree_entry, rb_node);
131 rb_link_node(node, parent, p);
132 rb_insert_color(node, root);
136 static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
137 struct rb_node **prev_ret)
139 struct rb_node * n = root->rb_node;
140 struct rb_node *prev = NULL;
141 struct tree_entry *entry;
142 struct tree_entry *prev_entry = NULL;
145 entry = rb_entry(n, struct tree_entry, rb_node);
149 if (offset < entry->start)
151 else if (offset > entry->end)
158 while(prev && offset > prev_entry->end) {
159 prev = rb_next(prev);
160 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
166 static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
168 struct rb_node *prev;
170 ret = __tree_search(root, offset, &prev);
176 static int tree_delete(struct rb_root *root, u64 offset)
178 struct rb_node *node;
179 struct tree_entry *entry;
181 node = __tree_search(root, offset, NULL);
184 entry = rb_entry(node, struct tree_entry, rb_node);
186 rb_erase(node, root);
191 * add_extent_mapping tries a simple backward merge with existing
192 * mappings. The extent_map struct passed in will be inserted into
193 * the tree directly (no copies made, just a reference taken).
195 int add_extent_mapping(struct extent_map_tree *tree,
196 struct extent_map *em)
199 struct extent_map *prev = NULL;
202 write_lock_irq(&tree->lock);
203 rb = tree_insert(&tree->map, em->end, &em->rb_node);
205 prev = rb_entry(rb, struct extent_map, rb_node);
206 printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end);
210 atomic_inc(&em->refs);
211 if (em->start != 0) {
212 rb = rb_prev(&em->rb_node);
214 prev = rb_entry(rb, struct extent_map, rb_node);
215 if (prev && prev->end + 1 == em->start &&
216 ((em->block_start == EXTENT_MAP_HOLE &&
217 prev->block_start == EXTENT_MAP_HOLE) ||
218 (em->block_start == prev->block_end + 1))) {
219 em->start = prev->start;
220 em->block_start = prev->block_start;
221 rb_erase(&prev->rb_node, &tree->map);
223 free_extent_map(prev);
227 write_unlock_irq(&tree->lock);
230 EXPORT_SYMBOL(add_extent_mapping);
233 * lookup_extent_mapping returns the first extent_map struct in the
234 * tree that intersects the [start, end] (inclusive) range. There may
235 * be additional objects in the tree that intersect, so check the object
236 * returned carefully to make sure you don't need additional lookups.
238 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
241 struct extent_map *em;
242 struct rb_node *rb_node;
244 read_lock_irq(&tree->lock);
245 rb_node = tree_search(&tree->map, start);
250 if (IS_ERR(rb_node)) {
251 em = ERR_PTR(PTR_ERR(rb_node));
254 em = rb_entry(rb_node, struct extent_map, rb_node);
255 if (em->end < start || em->start > end) {
259 atomic_inc(&em->refs);
261 read_unlock_irq(&tree->lock);
264 EXPORT_SYMBOL(lookup_extent_mapping);
267 * removes an extent_map struct from the tree. No reference counts are
268 * dropped, and no checks are done to see if the range is in use
270 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
274 write_lock_irq(&tree->lock);
275 ret = tree_delete(&tree->map, em->end);
276 write_unlock_irq(&tree->lock);
279 EXPORT_SYMBOL(remove_extent_mapping);
282 * utility function to look for merge candidates inside a given range.
283 * Any extents with matching state are merged together into a single
284 * extent in the tree. Extents with EXTENT_IO in their state field
285 * are not merged because the end_io handlers need to be able to do
286 * operations on them without sleeping (or doing allocations/splits).
288 * This should be called with the tree lock held.
290 static int merge_state(struct extent_map_tree *tree,
291 struct extent_state *state)
293 struct extent_state *other;
294 struct rb_node *other_node;
296 if (state->state & EXTENT_IOBITS)
299 other_node = rb_prev(&state->rb_node);
301 other = rb_entry(other_node, struct extent_state, rb_node);
302 if (other->end == state->start - 1 &&
303 other->state == state->state) {
304 state->start = other->start;
306 rb_erase(&other->rb_node, &tree->state);
307 free_extent_state(other);
310 other_node = rb_next(&state->rb_node);
312 other = rb_entry(other_node, struct extent_state, rb_node);
313 if (other->start == state->end + 1 &&
314 other->state == state->state) {
315 other->start = state->start;
317 rb_erase(&state->rb_node, &tree->state);
318 free_extent_state(state);
325 * insert an extent_state struct into the tree. 'bits' are set on the
326 * struct before it is inserted.
328 * This may return -EEXIST if the extent is already there, in which case the
329 * state struct is freed.
331 * The tree lock is not taken internally. This is a utility function and
332 * probably isn't what you want to call (see set/clear_extent_bit).
334 static int insert_state(struct extent_map_tree *tree,
335 struct extent_state *state, u64 start, u64 end,
338 struct rb_node *node;
341 printk("end < start %Lu %Lu\n", end, start);
344 state->state |= bits;
345 state->start = start;
347 if ((end & 4095) == 0) {
348 printk("insert state %Lu %Lu strange end\n", start, end);
351 node = tree_insert(&tree->state, end, &state->rb_node);
353 struct extent_state *found;
354 found = rb_entry(node, struct extent_state, rb_node);
355 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
356 free_extent_state(state);
359 merge_state(tree, state);
364 * split a given extent state struct in two, inserting the preallocated
365 * struct 'prealloc' as the newly created second half. 'split' indicates an
366 * offset inside 'orig' where it should be split.
369 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
370 * are two extent state structs in the tree:
371 * prealloc: [orig->start, split - 1]
372 * orig: [ split, orig->end ]
374 * The tree locks are not taken by this function. They need to be held
377 static int split_state(struct extent_map_tree *tree, struct extent_state *orig,
378 struct extent_state *prealloc, u64 split)
380 struct rb_node *node;
381 prealloc->start = orig->start;
382 prealloc->end = split - 1;
383 prealloc->state = orig->state;
385 if ((prealloc->end & 4095) == 0) {
386 printk("insert state %Lu %Lu strange end\n", prealloc->start,
390 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
392 struct extent_state *found;
393 found = rb_entry(node, struct extent_state, rb_node);
394 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
395 free_extent_state(prealloc);
402 * utility function to clear some bits in an extent state struct.
403 * it will optionally wake up any one waiting on this state (wake == 1), or
404 * forcibly remove the state from the tree (delete == 1).
406 * If no bits are set on the state struct after clearing things, the
407 * struct is freed and removed from the tree
409 static int clear_state_bit(struct extent_map_tree *tree,
410 struct extent_state *state, int bits, int wake,
413 int ret = state->state & bits;
414 state->state &= ~bits;
417 if (delete || state->state == 0) {
418 if (state->in_tree) {
419 rb_erase(&state->rb_node, &tree->state);
421 free_extent_state(state);
426 merge_state(tree, state);
432 * clear some bits on a range in the tree. This may require splitting
433 * or inserting elements in the tree, so the gfp mask is used to
434 * indicate which allocations or sleeping are allowed.
436 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
437 * the given range from the tree regardless of state (ie for truncate).
439 * the range [start, end] is inclusive.
441 * This takes the tree lock, and returns < 0 on error, > 0 if any of the
442 * bits were already set, or zero if none of the bits were already set.
444 int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end,
445 int bits, int wake, int delete, gfp_t mask)
447 struct extent_state *state;
448 struct extent_state *prealloc = NULL;
449 struct rb_node *node;
455 if (!prealloc && (mask & __GFP_WAIT)) {
456 prealloc = alloc_extent_state(mask);
461 write_lock_irqsave(&tree->lock, flags);
463 * this search will find the extents that end after
466 node = tree_search(&tree->state, start);
469 state = rb_entry(node, struct extent_state, rb_node);
470 if (state->start > end)
472 WARN_ON(state->end < start);
475 * | ---- desired range ---- |
477 * | ------------- state -------------- |
479 * We need to split the extent we found, and may flip
480 * bits on second half.
482 * If the extent we found extends past our range, we
483 * just split and search again. It'll get split again
484 * the next time though.
486 * If the extent we found is inside our range, we clear
487 * the desired bit on it.
490 if (state->start < start) {
491 err = split_state(tree, state, prealloc, start);
492 BUG_ON(err == -EEXIST);
496 if (state->end <= end) {
497 start = state->end + 1;
498 set |= clear_state_bit(tree, state, bits,
501 start = state->start;
506 * | ---- desired range ---- |
508 * We need to split the extent, and clear the bit
511 if (state->start <= end && state->end > end) {
512 err = split_state(tree, state, prealloc, end + 1);
513 BUG_ON(err == -EEXIST);
517 set |= clear_state_bit(tree, prealloc, bits,
523 start = state->end + 1;
524 set |= clear_state_bit(tree, state, bits, wake, delete);
528 write_unlock_irqrestore(&tree->lock, flags);
530 free_extent_state(prealloc);
537 write_unlock_irqrestore(&tree->lock, flags);
538 if (mask & __GFP_WAIT)
542 EXPORT_SYMBOL(clear_extent_bit);
544 static int wait_on_state(struct extent_map_tree *tree,
545 struct extent_state *state)
548 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
549 read_unlock_irq(&tree->lock);
551 read_lock_irq(&tree->lock);
552 finish_wait(&state->wq, &wait);
557 * waits for one or more bits to clear on a range in the state tree.
558 * The range [start, end] is inclusive.
559 * The tree lock is taken by this function
561 int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits)
563 struct extent_state *state;
564 struct rb_node *node;
566 read_lock_irq(&tree->lock);
570 * this search will find all the extents that end after
573 node = tree_search(&tree->state, start);
577 state = rb_entry(node, struct extent_state, rb_node);
579 if (state->start > end)
582 if (state->state & bits) {
583 start = state->start;
584 atomic_inc(&state->refs);
585 wait_on_state(tree, state);
586 free_extent_state(state);
589 start = state->end + 1;
594 if (need_resched()) {
595 read_unlock_irq(&tree->lock);
597 read_lock_irq(&tree->lock);
601 read_unlock_irq(&tree->lock);
604 EXPORT_SYMBOL(wait_extent_bit);
607 * set some bits on a range in the tree. This may require allocations
608 * or sleeping, so the gfp mask is used to indicate what is allowed.
610 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
611 * range already has the desired bits set. The start of the existing
612 * range is returned in failed_start in this case.
614 * [start, end] is inclusive
615 * This takes the tree lock.
617 int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits,
618 int exclusive, u64 *failed_start, gfp_t mask)
620 struct extent_state *state;
621 struct extent_state *prealloc = NULL;
622 struct rb_node *node;
629 if (!prealloc && (mask & __GFP_WAIT)) {
630 prealloc = alloc_extent_state(mask);
635 write_lock_irqsave(&tree->lock, flags);
637 * this search will find all the extents that end after
640 node = tree_search(&tree->state, start);
642 err = insert_state(tree, prealloc, start, end, bits);
644 BUG_ON(err == -EEXIST);
648 state = rb_entry(node, struct extent_state, rb_node);
649 last_start = state->start;
650 last_end = state->end;
653 * | ---- desired range ---- |
656 * Just lock what we found and keep going
658 if (state->start == start && state->end <= end) {
659 set = state->state & bits;
660 if (set && exclusive) {
661 *failed_start = state->start;
665 state->state |= bits;
666 start = state->end + 1;
667 merge_state(tree, state);
672 * | ---- desired range ---- |
675 * | ------------- state -------------- |
677 * We need to split the extent we found, and may flip bits on
680 * If the extent we found extends past our
681 * range, we just split and search again. It'll get split
682 * again the next time though.
684 * If the extent we found is inside our range, we set the
687 if (state->start < start) {
688 set = state->state & bits;
689 if (exclusive && set) {
690 *failed_start = start;
694 err = split_state(tree, state, prealloc, start);
695 BUG_ON(err == -EEXIST);
699 if (state->end <= end) {
700 state->state |= bits;
701 start = state->end + 1;
702 merge_state(tree, state);
704 start = state->start;
709 * | ---- desired range ---- |
710 * | state | or | state |
712 * There's a hole, we need to insert something in it and
713 * ignore the extent we found.
715 if (state->start > start) {
717 if (end < last_start)
720 this_end = last_start -1;
721 err = insert_state(tree, prealloc, start, this_end,
724 BUG_ON(err == -EEXIST);
727 start = this_end + 1;
731 * | ---- desired range ---- |
733 * We need to split the extent, and set the bit
736 if (state->start <= end && state->end > end) {
737 set = state->state & bits;
738 if (exclusive && set) {
739 *failed_start = start;
743 err = split_state(tree, state, prealloc, end + 1);
744 BUG_ON(err == -EEXIST);
746 prealloc->state |= bits;
747 merge_state(tree, prealloc);
755 write_unlock_irqrestore(&tree->lock, flags);
757 free_extent_state(prealloc);
764 write_unlock_irqrestore(&tree->lock, flags);
765 if (mask & __GFP_WAIT)
769 EXPORT_SYMBOL(set_extent_bit);
771 /* wrappers around set/clear extent bit */
772 int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
775 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
778 EXPORT_SYMBOL(set_extent_dirty);
780 int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end,
783 return set_extent_bit(tree, start, end,
784 EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
787 EXPORT_SYMBOL(set_extent_delalloc);
789 int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
792 return clear_extent_bit(tree, start, end,
793 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
795 EXPORT_SYMBOL(clear_extent_dirty);
797 int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
800 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
803 EXPORT_SYMBOL(set_extent_new);
805 int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
808 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
810 EXPORT_SYMBOL(clear_extent_new);
812 int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
815 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
818 EXPORT_SYMBOL(set_extent_uptodate);
820 int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
823 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
825 EXPORT_SYMBOL(clear_extent_uptodate);
827 int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
830 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
833 EXPORT_SYMBOL(set_extent_writeback);
835 int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
838 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
840 EXPORT_SYMBOL(clear_extent_writeback);
842 int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end)
844 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
846 EXPORT_SYMBOL(wait_on_extent_writeback);
849 * locks a range in ascending order, waiting for any locked regions
850 * it hits on the way. [start,end] are inclusive, and this will sleep.
852 int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask)
857 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
858 &failed_start, mask);
859 if (err == -EEXIST && (mask & __GFP_WAIT)) {
860 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
861 start = failed_start;
865 WARN_ON(start > end);
869 EXPORT_SYMBOL(lock_extent);
871 int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end,
874 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
876 EXPORT_SYMBOL(unlock_extent);
879 * helper function to set pages and extents in the tree dirty
881 int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end)
883 unsigned long index = start >> PAGE_CACHE_SHIFT;
884 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
887 while (index <= end_index) {
888 page = find_get_page(tree->mapping, index);
890 __set_page_dirty_nobuffers(page);
891 page_cache_release(page);
894 set_extent_dirty(tree, start, end, GFP_NOFS);
897 EXPORT_SYMBOL(set_range_dirty);
900 * helper function to set both pages and extents in the tree writeback
902 int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end)
904 unsigned long index = start >> PAGE_CACHE_SHIFT;
905 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
908 while (index <= end_index) {
909 page = find_get_page(tree->mapping, index);
911 set_page_writeback(page);
912 page_cache_release(page);
915 set_extent_writeback(tree, start, end, GFP_NOFS);
918 EXPORT_SYMBOL(set_range_writeback);
920 int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
921 u64 *start_ret, u64 *end_ret, int bits)
923 struct rb_node *node;
924 struct extent_state *state;
927 write_lock_irq(&tree->lock);
929 * this search will find all the extents that end after
932 node = tree_search(&tree->state, start);
933 if (!node || IS_ERR(node)) {
938 state = rb_entry(node, struct extent_state, rb_node);
939 if (state->state & bits) {
940 *start_ret = state->start;
941 *end_ret = state->end;
944 node = rb_next(node);
949 write_unlock_irq(&tree->lock);
952 EXPORT_SYMBOL(find_first_extent_bit);
954 u64 find_lock_delalloc_range(struct extent_map_tree *tree,
955 u64 start, u64 lock_start, u64 *end, u64 max_bytes)
957 struct rb_node *node;
958 struct extent_state *state;
959 u64 cur_start = start;
963 write_lock_irq(&tree->lock);
965 * this search will find all the extents that end after
969 node = tree_search(&tree->state, cur_start);
970 if (!node || IS_ERR(node)) {
975 state = rb_entry(node, struct extent_state, rb_node);
976 if (state->start != cur_start) {
979 if (!(state->state & EXTENT_DELALLOC)) {
982 if (state->start >= lock_start) {
983 if (state->state & EXTENT_LOCKED) {
985 atomic_inc(&state->refs);
986 write_unlock_irq(&tree->lock);
988 write_lock_irq(&tree->lock);
989 finish_wait(&state->wq, &wait);
990 free_extent_state(state);
993 state->state |= EXTENT_LOCKED;
997 cur_start = state->end + 1;
998 node = rb_next(node);
1001 total_bytes = state->end - state->start + 1;
1002 if (total_bytes >= max_bytes)
1006 write_unlock_irq(&tree->lock);
1011 * helper function to lock both pages and extents in the tree.
1012 * pages must be locked first.
1014 int lock_range(struct extent_map_tree *tree, u64 start, u64 end)
1016 unsigned long index = start >> PAGE_CACHE_SHIFT;
1017 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1021 while (index <= end_index) {
1022 page = grab_cache_page(tree->mapping, index);
1028 err = PTR_ERR(page);
1033 lock_extent(tree, start, end, GFP_NOFS);
1038 * we failed above in getting the page at 'index', so we undo here
1039 * up to but not including the page at 'index'
1042 index = start >> PAGE_CACHE_SHIFT;
1043 while (index < end_index) {
1044 page = find_get_page(tree->mapping, index);
1046 page_cache_release(page);
1051 EXPORT_SYMBOL(lock_range);
1054 * helper function to unlock both pages and extents in the tree.
1056 int unlock_range(struct extent_map_tree *tree, u64 start, u64 end)
1058 unsigned long index = start >> PAGE_CACHE_SHIFT;
1059 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1062 while (index <= end_index) {
1063 page = find_get_page(tree->mapping, index);
1065 page_cache_release(page);
1068 unlock_extent(tree, start, end, GFP_NOFS);
1071 EXPORT_SYMBOL(unlock_range);
1073 int set_state_private(struct extent_map_tree *tree, u64 start, u64 private)
1075 struct rb_node *node;
1076 struct extent_state *state;
1079 write_lock_irq(&tree->lock);
1081 * this search will find all the extents that end after
1084 node = tree_search(&tree->state, start);
1085 if (!node || IS_ERR(node)) {
1089 state = rb_entry(node, struct extent_state, rb_node);
1090 if (state->start != start) {
1094 state->private = private;
1096 write_unlock_irq(&tree->lock);
1101 int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private)
1103 struct rb_node *node;
1104 struct extent_state *state;
1107 read_lock_irq(&tree->lock);
1109 * this search will find all the extents that end after
1112 node = tree_search(&tree->state, start);
1113 if (!node || IS_ERR(node)) {
1117 state = rb_entry(node, struct extent_state, rb_node);
1118 if (state->start != start) {
1122 *private = state->private;
1124 read_unlock_irq(&tree->lock);
1129 * searches a range in the state tree for a given mask.
1130 * If 'filled' == 1, this returns 1 only if ever extent in the tree
1131 * has the bits set. Otherwise, 1 is returned if any bit in the
1132 * range is found set.
1134 static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
1135 int bits, int filled)
1137 struct extent_state *state = NULL;
1138 struct rb_node *node;
1141 read_lock_irq(&tree->lock);
1142 node = tree_search(&tree->state, start);
1143 while (node && start <= end) {
1144 state = rb_entry(node, struct extent_state, rb_node);
1145 if (state->start > end)
1148 if (filled && state->start > start) {
1152 if (state->state & bits) {
1156 } else if (filled) {
1160 start = state->end + 1;
1163 node = rb_next(node);
1165 read_unlock_irq(&tree->lock);
1170 * helper function to set a given page up to date if all the
1171 * extents in the tree for that page are up to date
1173 static int check_page_uptodate(struct extent_map_tree *tree,
1176 u64 start = page->index << PAGE_CACHE_SHIFT;
1177 u64 end = start + PAGE_CACHE_SIZE - 1;
1178 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
1179 SetPageUptodate(page);
1184 * helper function to unlock a page if all the extents in the tree
1185 * for that page are unlocked
1187 static int check_page_locked(struct extent_map_tree *tree,
1190 u64 start = page->index << PAGE_CACHE_SHIFT;
1191 u64 end = start + PAGE_CACHE_SIZE - 1;
1192 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
1198 * helper function to end page writeback if all the extents
1199 * in the tree for that page are done with writeback
1201 static int check_page_writeback(struct extent_map_tree *tree,
1204 u64 start = page->index << PAGE_CACHE_SHIFT;
1205 u64 end = start + PAGE_CACHE_SIZE - 1;
1206 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1207 end_page_writeback(page);
1211 /* lots and lots of room for performance fixes in the end_bio funcs */
1214 * after a writepage IO is done, we need to:
1215 * clear the uptodate bits on error
1216 * clear the writeback bits in the extent tree for this IO
1217 * end_page_writeback if the page has no more pending IO
1219 * Scheduling is not allowed, so the extent state tree is expected
1220 * to have one and only one object corresponding to this IO.
1222 static int end_bio_extent_writepage(struct bio *bio,
1223 unsigned int bytes_done, int err)
1225 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1226 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1227 struct extent_map_tree *tree = bio->bi_private;
1236 struct page *page = bvec->bv_page;
1237 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1238 end = start + bvec->bv_len - 1;
1240 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1245 if (--bvec >= bio->bi_io_vec)
1246 prefetchw(&bvec->bv_page->flags);
1249 clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
1250 ClearPageUptodate(page);
1253 clear_extent_writeback(tree, start, end, GFP_ATOMIC);
1256 end_page_writeback(page);
1258 check_page_writeback(tree, page);
1259 if (tree->ops && tree->ops->writepage_end_io_hook)
1260 tree->ops->writepage_end_io_hook(page, start, end);
1261 } while (bvec >= bio->bi_io_vec);
1268 * after a readpage IO is done, we need to:
1269 * clear the uptodate bits on error
1270 * set the uptodate bits if things worked
1271 * set the page up to date if all extents in the tree are uptodate
1272 * clear the lock bit in the extent tree
1273 * unlock the page if there are no other extents locked for it
1275 * Scheduling is not allowed, so the extent state tree is expected
1276 * to have one and only one object corresponding to this IO.
1278 static int end_bio_extent_readpage(struct bio *bio,
1279 unsigned int bytes_done, int err)
1281 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1282 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1283 struct extent_map_tree *tree = bio->bi_private;
1293 struct page *page = bvec->bv_page;
1294 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1295 end = start + bvec->bv_len - 1;
1297 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1302 if (--bvec >= bio->bi_io_vec)
1303 prefetchw(&bvec->bv_page->flags);
1305 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1306 ret = tree->ops->readpage_end_io_hook(page, start, end);
1311 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1313 SetPageUptodate(page);
1315 check_page_uptodate(tree, page);
1317 ClearPageUptodate(page);
1321 unlock_extent(tree, start, end, GFP_ATOMIC);
1326 check_page_locked(tree, page);
1327 } while (bvec >= bio->bi_io_vec);
1334 * IO done from prepare_write is pretty simple, we just unlock
1335 * the structs in the extent tree when done, and set the uptodate bits
1338 static int end_bio_extent_preparewrite(struct bio *bio,
1339 unsigned int bytes_done, int err)
1341 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1342 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1343 struct extent_map_tree *tree = bio->bi_private;
1351 struct page *page = bvec->bv_page;
1352 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1353 end = start + bvec->bv_len - 1;
1355 if (--bvec >= bio->bi_io_vec)
1356 prefetchw(&bvec->bv_page->flags);
1359 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1361 ClearPageUptodate(page);
1365 unlock_extent(tree, start, end, GFP_ATOMIC);
1367 } while (bvec >= bio->bi_io_vec);
1373 static int submit_extent_page(int rw, struct extent_map_tree *tree,
1374 struct page *page, sector_t sector,
1375 size_t size, unsigned long offset,
1376 struct block_device *bdev,
1377 bio_end_io_t end_io_func)
1382 bio = bio_alloc(GFP_NOIO, 1);
1384 bio->bi_sector = sector;
1385 bio->bi_bdev = bdev;
1386 bio->bi_io_vec[0].bv_page = page;
1387 bio->bi_io_vec[0].bv_len = size;
1388 bio->bi_io_vec[0].bv_offset = offset;
1392 bio->bi_size = size;
1394 bio->bi_end_io = end_io_func;
1395 bio->bi_private = tree;
1398 submit_bio(rw, bio);
1400 if (bio_flagged(bio, BIO_EOPNOTSUPP))
1407 void set_page_extent_mapped(struct page *page)
1409 if (!PagePrivate(page)) {
1410 SetPagePrivate(page);
1411 WARN_ON(!page->mapping->a_ops->invalidatepage);
1412 set_page_private(page, 1);
1413 page_cache_get(page);
1418 * basic readpage implementation. Locked extent state structs are inserted
1419 * into the tree that are removed when the IO is done (by the end_io
1422 int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
1423 get_extent_t *get_extent)
1425 struct inode *inode = page->mapping->host;
1426 u64 start = page->index << PAGE_CACHE_SHIFT;
1427 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1431 u64 last_byte = i_size_read(inode);
1435 struct extent_map *em;
1436 struct block_device *bdev;
1439 size_t page_offset = 0;
1441 size_t blocksize = inode->i_sb->s_blocksize;
1443 set_page_extent_mapped(page);
1446 lock_extent(tree, start, end, GFP_NOFS);
1448 while (cur <= end) {
1449 if (cur >= last_byte) {
1450 iosize = PAGE_CACHE_SIZE - page_offset;
1451 zero_user_page(page, page_offset, iosize, KM_USER0);
1452 set_extent_uptodate(tree, cur, cur + iosize - 1,
1454 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1457 em = get_extent(inode, page, page_offset, cur, end, 0);
1458 if (IS_ERR(em) || !em) {
1460 unlock_extent(tree, cur, end, GFP_NOFS);
1464 extent_offset = cur - em->start;
1465 BUG_ON(em->end < cur);
1468 iosize = min(em->end - cur, end - cur) + 1;
1469 cur_end = min(em->end, end);
1470 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1471 sector = (em->block_start + extent_offset) >> 9;
1473 block_start = em->block_start;
1474 free_extent_map(em);
1477 /* we've found a hole, just zero and go on */
1478 if (block_start == EXTENT_MAP_HOLE) {
1479 zero_user_page(page, page_offset, iosize, KM_USER0);
1480 set_extent_uptodate(tree, cur, cur + iosize - 1,
1482 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1484 page_offset += iosize;
1487 /* the get_extent function already copied into the page */
1488 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
1489 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1491 page_offset += iosize;
1496 if (tree->ops && tree->ops->readpage_io_hook) {
1497 ret = tree->ops->readpage_io_hook(page, cur,
1501 ret = submit_extent_page(READ, tree, page,
1502 sector, iosize, page_offset,
1503 bdev, end_bio_extent_readpage);
1508 page_offset += iosize;
1512 if (!PageError(page))
1513 SetPageUptodate(page);
1518 EXPORT_SYMBOL(extent_read_full_page);
1521 * the writepage semantics are similar to regular writepage. extent
1522 * records are inserted to lock ranges in the tree, and as dirty areas
1523 * are found, they are marked writeback. Then the lock bits are removed
1524 * and the end_io handler clears the writeback ranges
1526 int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
1527 get_extent_t *get_extent,
1528 struct writeback_control *wbc)
1530 struct inode *inode = page->mapping->host;
1531 u64 start = page->index << PAGE_CACHE_SHIFT;
1532 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1536 u64 last_byte = i_size_read(inode);
1539 struct extent_map *em;
1540 struct block_device *bdev;
1543 size_t page_offset = 0;
1546 loff_t i_size = i_size_read(inode);
1547 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
1551 WARN_ON(!PageLocked(page));
1552 if (page->index > end_index) {
1553 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1558 if (page->index == end_index) {
1559 size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
1560 zero_user_page(page, offset,
1561 PAGE_CACHE_SIZE - offset, KM_USER0);
1564 set_page_extent_mapped(page);
1566 lock_extent(tree, start, page_end, GFP_NOFS);
1567 nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1,
1571 tree->ops->fill_delalloc(inode, start, delalloc_end);
1572 if (delalloc_end >= page_end + 1) {
1573 clear_extent_bit(tree, page_end + 1, delalloc_end,
1574 EXTENT_LOCKED | EXTENT_DELALLOC,
1577 clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC,
1579 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1580 printk("found delalloc bits after clear extent_bit\n");
1582 } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1583 printk("found delalloc bits after find_delalloc_range returns 0\n");
1587 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1588 printk("found delalloc bits after lock_extent\n");
1591 if (last_byte <= start) {
1592 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1596 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
1597 blocksize = inode->i_sb->s_blocksize;
1599 while (cur <= end) {
1600 if (cur >= last_byte) {
1601 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
1604 em = get_extent(inode, page, page_offset, cur, end, 0);
1605 if (IS_ERR(em) || !em) {
1610 extent_offset = cur - em->start;
1611 BUG_ON(em->end < cur);
1613 iosize = min(em->end - cur, end - cur) + 1;
1614 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1615 sector = (em->block_start + extent_offset) >> 9;
1617 block_start = em->block_start;
1618 free_extent_map(em);
1621 if (block_start == EXTENT_MAP_HOLE ||
1622 block_start == EXTENT_MAP_INLINE) {
1623 clear_extent_dirty(tree, cur,
1624 cur + iosize - 1, GFP_NOFS);
1626 page_offset += iosize;
1630 /* leave this out until we have a page_mkwrite call */
1631 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
1634 page_offset += iosize;
1637 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
1638 if (tree->ops && tree->ops->writepage_io_hook) {
1639 ret = tree->ops->writepage_io_hook(page, cur,
1647 set_range_writeback(tree, cur, cur + iosize - 1);
1648 ret = submit_extent_page(WRITE, tree, page, sector,
1649 iosize, page_offset, bdev,
1650 end_bio_extent_writepage);
1655 page_offset += iosize;
1659 unlock_extent(tree, start, page_end, GFP_NOFS);
1663 EXPORT_SYMBOL(extent_write_full_page);
1666 * basic invalidatepage code, this waits on any locked or writeback
1667 * ranges corresponding to the page, and then deletes any extent state
1668 * records from the tree
1670 int extent_invalidatepage(struct extent_map_tree *tree,
1671 struct page *page, unsigned long offset)
1673 u64 start = (page->index << PAGE_CACHE_SHIFT);
1674 u64 end = start + PAGE_CACHE_SIZE - 1;
1675 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
1677 start += (offset + blocksize -1) & ~(blocksize - 1);
1681 lock_extent(tree, start, end, GFP_NOFS);
1682 wait_on_extent_writeback(tree, start, end);
1683 clear_extent_bit(tree, start, end,
1684 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
1688 EXPORT_SYMBOL(extent_invalidatepage);
1691 * simple commit_write call, set_range_dirty is used to mark both
1692 * the pages and the extent records as dirty
1694 int extent_commit_write(struct extent_map_tree *tree,
1695 struct inode *inode, struct page *page,
1696 unsigned from, unsigned to)
1698 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1700 set_page_extent_mapped(page);
1701 set_page_dirty(page);
1703 if (pos > inode->i_size) {
1704 i_size_write(inode, pos);
1705 mark_inode_dirty(inode);
1709 EXPORT_SYMBOL(extent_commit_write);
1711 int extent_prepare_write(struct extent_map_tree *tree,
1712 struct inode *inode, struct page *page,
1713 unsigned from, unsigned to, get_extent_t *get_extent)
1715 u64 page_start = page->index << PAGE_CACHE_SHIFT;
1716 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1718 u64 orig_block_start;
1721 struct extent_map *em;
1722 unsigned blocksize = 1 << inode->i_blkbits;
1723 size_t page_offset = 0;
1724 size_t block_off_start;
1725 size_t block_off_end;
1731 set_page_extent_mapped(page);
1733 block_start = (page_start + from) & ~((u64)blocksize - 1);
1734 block_end = (page_start + to - 1) | (blocksize - 1);
1735 orig_block_start = block_start;
1737 lock_extent(tree, page_start, page_end, GFP_NOFS);
1738 while(block_start <= block_end) {
1739 em = get_extent(inode, page, page_offset, block_start,
1741 if (IS_ERR(em) || !em) {
1744 cur_end = min(block_end, em->end);
1745 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
1746 block_off_end = block_off_start + blocksize;
1747 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
1749 if (!PageUptodate(page) && isnew &&
1750 (block_off_end > to || block_off_start < from)) {
1753 kaddr = kmap_atomic(page, KM_USER0);
1754 if (block_off_end > to)
1755 memset(kaddr + to, 0, block_off_end - to);
1756 if (block_off_start < from)
1757 memset(kaddr + block_off_start, 0,
1758 from - block_off_start);
1759 flush_dcache_page(page);
1760 kunmap_atomic(kaddr, KM_USER0);
1762 if (!isnew && !PageUptodate(page) &&
1763 (block_off_end > to || block_off_start < from) &&
1764 !test_range_bit(tree, block_start, cur_end,
1765 EXTENT_UPTODATE, 1)) {
1767 u64 extent_offset = block_start - em->start;
1769 sector = (em->block_start + extent_offset) >> 9;
1770 iosize = (cur_end - block_start + blocksize - 1) &
1771 ~((u64)blocksize - 1);
1773 * we've already got the extent locked, but we
1774 * need to split the state such that our end_bio
1775 * handler can clear the lock.
1777 set_extent_bit(tree, block_start,
1778 block_start + iosize - 1,
1779 EXTENT_LOCKED, 0, NULL, GFP_NOFS);
1780 ret = submit_extent_page(READ, tree, page,
1781 sector, iosize, page_offset, em->bdev,
1782 end_bio_extent_preparewrite);
1784 block_start = block_start + iosize;
1786 set_extent_uptodate(tree, block_start, cur_end,
1788 unlock_extent(tree, block_start, cur_end, GFP_NOFS);
1789 block_start = cur_end + 1;
1791 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
1792 free_extent_map(em);
1795 wait_extent_bit(tree, orig_block_start,
1796 block_end, EXTENT_LOCKED);
1798 check_page_uptodate(tree, page);
1800 /* FIXME, zero out newly allocated blocks on error */
1803 EXPORT_SYMBOL(extent_prepare_write);
1806 * a helper for releasepage. As long as there are no locked extents
1807 * in the range corresponding to the page, both state records and extent
1808 * map records are removed
1810 int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page)
1812 struct extent_map *em;
1813 u64 start = page->index << PAGE_CACHE_SHIFT;
1814 u64 end = start + PAGE_CACHE_SIZE - 1;
1815 u64 orig_start = start;
1818 while (start <= end) {
1819 em = lookup_extent_mapping(tree, start, end);
1820 if (!em || IS_ERR(em))
1822 if (!test_range_bit(tree, em->start, em->end,
1823 EXTENT_LOCKED, 0)) {
1824 remove_extent_mapping(tree, em);
1825 /* once for the rb tree */
1826 free_extent_map(em);
1828 start = em->end + 1;
1830 free_extent_map(em);
1832 if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0))
1835 clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
1839 EXPORT_SYMBOL(try_release_extent_mapping);
1841 sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
1842 get_extent_t *get_extent)
1844 struct inode *inode = mapping->host;
1845 u64 start = iblock << inode->i_blkbits;
1846 u64 end = start + (1 << inode->i_blkbits) - 1;
1847 struct extent_map *em;
1849 em = get_extent(inode, NULL, 0, start, end, 0);
1850 if (!em || IS_ERR(em))
1853 // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device
1854 if (em->block_start == EXTENT_MAP_INLINE ||
1855 em->block_start == EXTENT_MAP_HOLE)
1858 return (em->block_start + start - em->start) >> inode->i_blkbits;
1861 struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
1862 u64 start, unsigned long len,
1865 unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) -
1866 (start >> PAGE_CACHE_SHIFT) + 1;
1868 unsigned long index = start >> PAGE_CACHE_SHIFT;
1869 struct extent_buffer *eb;
1871 struct address_space *mapping = tree->mapping;
1874 eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask);
1875 if (!eb || IS_ERR(eb))
1880 atomic_set(&eb->refs, 1);
1882 for (i = 0; i < num_pages; i++, index++) {
1883 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
1887 if (!PageUptodate(p))
1892 eb->flags |= EXTENT_UPTODATE;
1895 free_extent_buffer(eb);
1898 EXPORT_SYMBOL(alloc_extent_buffer);
1900 struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
1901 u64 start, unsigned long len,
1904 unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) -
1905 (start >> PAGE_CACHE_SHIFT) + 1;
1907 unsigned long index = start >> PAGE_CACHE_SHIFT;
1908 struct extent_buffer *eb;
1910 struct address_space *mapping = tree->mapping;
1912 eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask);
1913 if (!eb || IS_ERR(eb))
1918 atomic_set(&eb->refs, 1);
1920 for (i = 0; i < num_pages; i++, index++) {
1921 p = find_get_page(mapping, index);
1928 free_extent_buffer(eb);
1931 EXPORT_SYMBOL(find_extent_buffer);
1933 void free_extent_buffer(struct extent_buffer *eb)
1936 unsigned long num_pages;
1941 if (!atomic_dec_and_test(&eb->refs))
1944 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) -
1945 (eb->start >> PAGE_CACHE_SHIFT) + 1;
1947 for (i = 0; i < num_pages; i++) {
1949 page_cache_release(eb->pages[i]);
1953 EXPORT_SYMBOL(free_extent_buffer);
1955 int clear_extent_buffer_dirty(struct extent_map_tree *tree,
1956 struct extent_buffer *eb)
1960 unsigned long num_pages;
1963 u64 start = eb->start;
1964 u64 end = start + eb->len - 1;
1966 set = clear_extent_dirty(tree, start, end, GFP_NOFS);
1967 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) -
1968 (eb->start >> PAGE_CACHE_SHIFT) + 1;
1970 for (i = 0; i < num_pages; i++) {
1971 page = eb->pages[i];
1974 * if we're on the last page or the first page and the
1975 * block isn't aligned on a page boundary, do extra checks
1976 * to make sure we don't clean page that is partially dirty
1978 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
1979 ((i == num_pages - 1) &&
1980 ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) {
1981 start = page->index << PAGE_CACHE_SHIFT;
1982 end = start + PAGE_CACHE_SIZE - 1;
1983 if (test_range_bit(tree, start, end,
1989 clear_page_dirty_for_io(page);
1994 EXPORT_SYMBOL(clear_extent_buffer_dirty);
1996 int wait_on_extent_buffer_writeback(struct extent_map_tree *tree,
1997 struct extent_buffer *eb)
1999 return wait_on_extent_writeback(tree, eb->start,
2000 eb->start + eb->len - 1);
2002 EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
2004 int set_extent_buffer_dirty(struct extent_map_tree *tree,
2005 struct extent_buffer *eb)
2007 return set_range_dirty(tree, eb->start, eb->start + eb->len - 1);
2009 EXPORT_SYMBOL(set_extent_buffer_dirty);
2011 int set_extent_buffer_uptodate(struct extent_map_tree *tree,
2012 struct extent_buffer *eb)
2016 unsigned long num_pages;
2018 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) -
2019 (eb->start >> PAGE_CACHE_SHIFT) + 1;
2021 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
2023 for (i = 0; i < num_pages; i++) {
2024 page = eb->pages[i];
2025 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2026 ((i == num_pages - 1) &&
2027 ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) {
2028 check_page_uptodate(tree, page);
2031 SetPageUptodate(page);
2035 EXPORT_SYMBOL(set_extent_buffer_uptodate);
2037 int extent_buffer_uptodate(struct extent_map_tree *tree,
2038 struct extent_buffer *eb)
2040 if (eb->flags & EXTENT_UPTODATE)
2042 return test_range_bit(tree, eb->start, eb->start + eb->len - 1,
2043 EXTENT_UPTODATE, 1);
2045 EXPORT_SYMBOL(extent_buffer_uptodate);
2047 int read_extent_buffer_pages(struct extent_map_tree *tree,
2048 struct extent_buffer *eb, int wait)
2054 unsigned long num_pages;
2056 if (eb->flags & EXTENT_UPTODATE)
2059 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
2060 EXTENT_UPTODATE, 1)) {
2064 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) -
2065 (eb->start >> PAGE_CACHE_SHIFT) + 1;
2066 for (i = 0; i < num_pages; i++) {
2067 page = eb->pages[i];
2068 if (PageUptodate(page)) {
2072 if (TestSetPageLocked(page)) {
2078 if (!PageUptodate(page)) {
2079 err = page->mapping->a_ops->readpage(NULL, page);
2092 for (i = 0; i < num_pages; i++) {
2093 page = eb->pages[i];
2094 wait_on_page_locked(page);
2095 if (!PageUptodate(page)) {
2099 eb->flags |= EXTENT_UPTODATE;
2102 EXPORT_SYMBOL(read_extent_buffer_pages);
2104 void read_extent_buffer(struct extent_buffer *eb, void *dstv,
2105 unsigned long start,
2112 char *dst = (char *)dstv;
2113 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2114 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2116 WARN_ON(start > eb->len);
2117 WARN_ON(start + len > eb->start + eb->len);
2119 page = eb->pages[i];
2120 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
2122 offset += start_offset;
2125 WARN_ON(!PageUptodate(page));
2127 cur = min(len, (PAGE_CACHE_SIZE - offset));
2128 // kaddr = kmap_atomic(page, KM_USER0);
2129 kaddr = page_address(page);
2130 memcpy(dst, kaddr + offset, cur);
2131 // kunmap_atomic(kaddr, KM_USER0);
2137 page = eb->pages[i];
2140 EXPORT_SYMBOL(read_extent_buffer);
2142 int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
2143 char **token, char **map,
2144 unsigned long *map_start,
2145 unsigned long *map_len, int km)
2149 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2150 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2152 WARN_ON(start > eb->len);
2155 offset = start_offset;
2159 *map_start = (i << PAGE_CACHE_SHIFT) - offset;
2162 // kaddr = kmap_atomic(eb->pages[i], km);
2163 kaddr = page_address(eb->pages[i]);
2165 *map = kaddr + offset;
2166 *map_len = PAGE_CACHE_SIZE - offset;
2169 EXPORT_SYMBOL(map_extent_buffer);
2171 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
2173 // kunmap_atomic(token, km);
2175 EXPORT_SYMBOL(unmap_extent_buffer);
2177 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
2178 unsigned long start,
2185 char *ptr = (char *)ptrv;
2186 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2187 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2190 WARN_ON(start > eb->len);
2191 WARN_ON(start + len > eb->start + eb->len);
2193 page = eb->pages[i];
2194 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
2196 offset += start_offset;
2199 WARN_ON(!PageUptodate(page));
2201 cur = min(len, (PAGE_CACHE_SIZE - offset));
2203 // kaddr = kmap_atomic(page, KM_USER0);
2204 kaddr = page_address(page);
2205 ret = memcmp(ptr, kaddr + offset, cur);
2206 // kunmap_atomic(kaddr, KM_USER0);
2214 page = eb->pages[i];
2218 EXPORT_SYMBOL(memcmp_extent_buffer);
2220 void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
2221 unsigned long start, unsigned long len)
2227 char *src = (char *)srcv;
2228 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2229 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2231 WARN_ON(start > eb->len);
2232 WARN_ON(start + len > eb->start + eb->len);
2234 page = eb->pages[i];
2235 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
2237 offset += start_offset;
2240 WARN_ON(!PageUptodate(page));
2242 cur = min(len, PAGE_CACHE_SIZE - offset);
2243 // kaddr = kmap_atomic(page, KM_USER0);
2244 kaddr = page_address(page);
2245 memcpy(kaddr + offset, src, cur);
2246 // kunmap_atomic(kaddr, KM_USER0);
2252 page = eb->pages[i];
2255 EXPORT_SYMBOL(write_extent_buffer);
2257 void memset_extent_buffer(struct extent_buffer *eb, char c,
2258 unsigned long start, unsigned long len)
2264 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2265 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2267 WARN_ON(start > eb->len);
2268 WARN_ON(start + len > eb->start + eb->len);
2270 page = eb->pages[i];
2271 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
2273 offset += start_offset;
2276 WARN_ON(!PageUptodate(page));
2278 cur = min(len, PAGE_CACHE_SIZE - offset);
2279 // kaddr = kmap_atomic(page, KM_USER0);
2280 kaddr = page_address(page);
2281 memset(kaddr + offset, c, cur);
2282 // kunmap_atomic(kaddr, KM_USER0);
2287 page = eb->pages[i];
2290 EXPORT_SYMBOL(memset_extent_buffer);
2292 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
2293 unsigned long dst_offset, unsigned long src_offset,
2296 u64 dst_len = dst->len;
2301 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
2302 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
2304 WARN_ON(src->len != dst_len);
2306 offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1);
2308 offset += start_offset;
2311 page = dst->pages[i];
2312 WARN_ON(!PageUptodate(page));
2314 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
2316 // kaddr = kmap_atomic(page, KM_USER1);
2317 kaddr = page_address(page);
2318 read_extent_buffer(src, kaddr + offset, src_offset, cur);
2319 // kunmap_atomic(kaddr, KM_USER1);
2327 EXPORT_SYMBOL(copy_extent_buffer);
2329 static void move_pages(struct page *dst_page, struct page *src_page,
2330 unsigned long dst_off, unsigned long src_off,
2333 // char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
2334 char *dst_kaddr = page_address(dst_page);
2335 if (dst_page == src_page) {
2336 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
2338 // char *src_kaddr = kmap_atomic(src_page, KM_USER1);
2339 char *src_kaddr = page_address(src_page);
2340 char *p = dst_kaddr + dst_off + len;
2341 char *s = src_kaddr + src_off + len;
2346 // kunmap_atomic(src_kaddr, KM_USER1);
2348 // kunmap_atomic(dst_kaddr, KM_USER0);
2351 static void copy_pages(struct page *dst_page, struct page *src_page,
2352 unsigned long dst_off, unsigned long src_off,
2355 //kmap_atomic(dst_page, KM_USER0);
2356 char *dst_kaddr = page_address(dst_page);
2359 if (dst_page != src_page)
2360 src_kaddr = page_address(src_page); // kmap_atomic(src_page, KM_USER1);
2362 src_kaddr = dst_kaddr;
2364 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
2366 kunmap_atomic(dst_kaddr, KM_USER0);
2367 if (dst_page != src_page)
2368 kunmap_atomic(src_kaddr, KM_USER1);
2372 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
2373 unsigned long src_offset, unsigned long len)
2376 size_t dst_off_in_page;
2377 size_t src_off_in_page;
2378 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
2379 unsigned long dst_i;
2380 unsigned long src_i;
2382 if (src_offset + len > dst->len) {
2383 printk("memmove bogus src_offset %lu move len %lu len %lu\n",
2384 src_offset, len, dst->len);
2387 if (dst_offset + len > dst->len) {
2388 printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
2389 dst_offset, len, dst->len);
2394 dst_off_in_page = dst_offset &
2395 ((unsigned long)PAGE_CACHE_SIZE - 1);
2396 src_off_in_page = src_offset &
2397 ((unsigned long)PAGE_CACHE_SIZE - 1);
2399 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
2400 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
2403 src_off_in_page += start_offset;
2405 dst_off_in_page += start_offset;
2407 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
2409 cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE -
2412 copy_pages(dst->pages[dst_i], dst->pages[src_i],
2413 dst_off_in_page, src_off_in_page, cur);
2420 EXPORT_SYMBOL(memcpy_extent_buffer);
2422 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
2423 unsigned long src_offset, unsigned long len)
2426 size_t dst_off_in_page;
2427 size_t src_off_in_page;
2428 unsigned long dst_end = dst_offset + len - 1;
2429 unsigned long src_end = src_offset + len - 1;
2430 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
2431 unsigned long dst_i;
2432 unsigned long src_i;
2434 if (src_offset + len > dst->len) {
2435 printk("memmove bogus src_offset %lu move len %lu len %lu\n",
2436 src_offset, len, dst->len);
2439 if (dst_offset + len > dst->len) {
2440 printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
2441 dst_offset, len, dst->len);
2444 if (dst_offset < src_offset) {
2445 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
2449 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
2450 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
2452 dst_off_in_page = dst_end &
2453 ((unsigned long)PAGE_CACHE_SIZE - 1);
2454 src_off_in_page = src_end &
2455 ((unsigned long)PAGE_CACHE_SIZE - 1);
2458 src_off_in_page += start_offset;
2460 dst_off_in_page += start_offset;
2462 cur = min(len, src_off_in_page + 1);
2463 cur = min(cur, dst_off_in_page + 1);
2464 // printk("move pages orig dst %lu src %lu len %lu, this %lu %lu %lu\n", dst_offset, src_offset, len, dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur);
2465 move_pages(dst->pages[dst_i], dst->pages[src_i],
2466 dst_off_in_page - cur + 1,
2467 src_off_in_page - cur + 1, cur);
2474 EXPORT_SYMBOL(memmove_extent_buffer);