Btrfs: fix enospc when there is plenty of space
[safe/jmp/linux-2.6] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include "hash.h"
23 #include "crc32c.h"
24 #include "ctree.h"
25 #include "disk-io.h"
26 #include "print-tree.h"
27 #include "transaction.h"
28 #include "volumes.h"
29 #include "locking.h"
30 #include "ref-cache.h"
31
32 #define PENDING_EXTENT_INSERT 0
33 #define PENDING_EXTENT_DELETE 1
34 #define PENDING_BACKREF_UPDATE 2
35
36 struct pending_extent_op {
37         int type;
38         u64 bytenr;
39         u64 num_bytes;
40         u64 parent;
41         u64 orig_parent;
42         u64 generation;
43         u64 orig_generation;
44         int level;
45 };
46
47 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
48                                  btrfs_root *extent_root);
49 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
50                                btrfs_root *extent_root);
51 static struct btrfs_block_group_cache *
52 __btrfs_find_block_group(struct btrfs_root *root,
53                          struct btrfs_block_group_cache *hint,
54                          u64 search_start, int data, int owner);
55
56 void maybe_lock_mutex(struct btrfs_root *root)
57 {
58         if (root != root->fs_info->extent_root &&
59             root != root->fs_info->chunk_root &&
60             root != root->fs_info->dev_root) {
61                 mutex_lock(&root->fs_info->alloc_mutex);
62         }
63 }
64
65 void maybe_unlock_mutex(struct btrfs_root *root)
66 {
67         if (root != root->fs_info->extent_root &&
68             root != root->fs_info->chunk_root &&
69             root != root->fs_info->dev_root) {
70                 mutex_unlock(&root->fs_info->alloc_mutex);
71         }
72 }
73
74 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
75 {
76         return (cache->flags & bits) == bits;
77 }
78
79 /*
80  * this adds the block group to the fs_info rb tree for the block group
81  * cache
82  */
83 int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
84                                 struct btrfs_block_group_cache *block_group)
85 {
86         struct rb_node **p;
87         struct rb_node *parent = NULL;
88         struct btrfs_block_group_cache *cache;
89
90         spin_lock(&info->block_group_cache_lock);
91         p = &info->block_group_cache_tree.rb_node;
92
93         while (*p) {
94                 parent = *p;
95                 cache = rb_entry(parent, struct btrfs_block_group_cache,
96                                  cache_node);
97                 if (block_group->key.objectid < cache->key.objectid) {
98                         p = &(*p)->rb_left;
99                 } else if (block_group->key.objectid > cache->key.objectid) {
100                         p = &(*p)->rb_right;
101                 } else {
102                         spin_unlock(&info->block_group_cache_lock);
103                         return -EEXIST;
104                 }
105         }
106
107         rb_link_node(&block_group->cache_node, parent, p);
108         rb_insert_color(&block_group->cache_node,
109                         &info->block_group_cache_tree);
110         spin_unlock(&info->block_group_cache_lock);
111
112         return 0;
113 }
114
115 /*
116  * This will return the block group at or after bytenr if contains is 0, else
117  * it will return the block group that contains the bytenr
118  */
119 static struct btrfs_block_group_cache *
120 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
121                               int contains)
122 {
123         struct btrfs_block_group_cache *cache, *ret = NULL;
124         struct rb_node *n;
125         u64 end, start;
126
127         spin_lock(&info->block_group_cache_lock);
128         n = info->block_group_cache_tree.rb_node;
129
130         while (n) {
131                 cache = rb_entry(n, struct btrfs_block_group_cache,
132                                  cache_node);
133                 end = cache->key.objectid + cache->key.offset - 1;
134                 start = cache->key.objectid;
135
136                 if (bytenr < start) {
137                         if (!contains && (!ret || start < ret->key.objectid))
138                                 ret = cache;
139                         n = n->rb_left;
140                 } else if (bytenr > start) {
141                         if (contains && bytenr <= end) {
142                                 ret = cache;
143                                 break;
144                         }
145                         n = n->rb_right;
146                 } else {
147                         ret = cache;
148                         break;
149                 }
150         }
151         spin_unlock(&info->block_group_cache_lock);
152
153         return ret;
154 }
155
156 /*
157  * this is only called by cache_block_group, since we could have freed extents
158  * we need to check the pinned_extents for any extents that can't be used yet
159  * since their free space will be released as soon as the transaction commits.
160  */
161 static int add_new_free_space(struct btrfs_block_group_cache *block_group,
162                               struct btrfs_fs_info *info, u64 start, u64 end)
163 {
164         u64 extent_start, extent_end, size;
165         int ret;
166
167         while (start < end) {
168                 ret = find_first_extent_bit(&info->pinned_extents, start,
169                                             &extent_start, &extent_end,
170                                             EXTENT_DIRTY);
171                 if (ret)
172                         break;
173
174                 if (extent_start == start) {
175                         start = extent_end + 1;
176                 } else if (extent_start > start && extent_start < end) {
177                         size = extent_start - start;
178                         ret = btrfs_add_free_space(block_group, start, size);
179                         BUG_ON(ret);
180                         start = extent_end + 1;
181                 } else {
182                         break;
183                 }
184         }
185
186         if (start < end) {
187                 size = end - start;
188                 ret = btrfs_add_free_space(block_group, start, size);
189                 BUG_ON(ret);
190         }
191
192         return 0;
193 }
194
195 static int cache_block_group(struct btrfs_root *root,
196                              struct btrfs_block_group_cache *block_group)
197 {
198         struct btrfs_path *path;
199         int ret = 0;
200         struct btrfs_key key;
201         struct extent_buffer *leaf;
202         int slot;
203         u64 last = 0;
204         u64 first_free;
205         int found = 0;
206
207         if (!block_group)
208                 return 0;
209
210         root = root->fs_info->extent_root;
211
212         if (block_group->cached)
213                 return 0;
214
215         path = btrfs_alloc_path();
216         if (!path)
217                 return -ENOMEM;
218
219         path->reada = 2;
220         /*
221          * we get into deadlocks with paths held by callers of this function.
222          * since the alloc_mutex is protecting things right now, just
223          * skip the locking here
224          */
225         path->skip_locking = 1;
226         first_free = max_t(u64, block_group->key.objectid,
227                            BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
228         key.objectid = block_group->key.objectid;
229         key.offset = 0;
230         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
231         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
232         if (ret < 0)
233                 goto err;
234         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
235         if (ret < 0)
236                 goto err;
237         if (ret == 0) {
238                 leaf = path->nodes[0];
239                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
240                 if (key.objectid + key.offset > first_free)
241                         first_free = key.objectid + key.offset;
242         }
243         while(1) {
244                 leaf = path->nodes[0];
245                 slot = path->slots[0];
246                 if (slot >= btrfs_header_nritems(leaf)) {
247                         ret = btrfs_next_leaf(root, path);
248                         if (ret < 0)
249                                 goto err;
250                         if (ret == 0)
251                                 continue;
252                         else
253                                 break;
254                 }
255                 btrfs_item_key_to_cpu(leaf, &key, slot);
256                 if (key.objectid < block_group->key.objectid)
257                         goto next;
258
259                 if (key.objectid >= block_group->key.objectid +
260                     block_group->key.offset)
261                         break;
262
263                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
264                         if (!found) {
265                                 last = first_free;
266                                 found = 1;
267                         }
268
269                         add_new_free_space(block_group, root->fs_info, last,
270                                            key.objectid);
271
272                         last = key.objectid + key.offset;
273                 }
274 next:
275                 path->slots[0]++;
276         }
277
278         if (!found)
279                 last = first_free;
280
281         add_new_free_space(block_group, root->fs_info, last,
282                            block_group->key.objectid +
283                            block_group->key.offset);
284
285         block_group->cached = 1;
286         ret = 0;
287 err:
288         btrfs_free_path(path);
289         return ret;
290 }
291
292 /*
293  * return the block group that starts at or after bytenr
294  */
295 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
296                                                        btrfs_fs_info *info,
297                                                          u64 bytenr)
298 {
299         struct btrfs_block_group_cache *cache;
300
301         cache = block_group_cache_tree_search(info, bytenr, 0);
302
303         return cache;
304 }
305
306 /*
307  * return the block group that contains teh given bytenr
308  */
309 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
310                                                          btrfs_fs_info *info,
311                                                          u64 bytenr)
312 {
313         struct btrfs_block_group_cache *cache;
314
315         cache = block_group_cache_tree_search(info, bytenr, 1);
316
317         return cache;
318 }
319
320 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
321                                                   u64 flags)
322 {
323         struct list_head *head = &info->space_info;
324         struct list_head *cur;
325         struct btrfs_space_info *found;
326         list_for_each(cur, head) {
327                 found = list_entry(cur, struct btrfs_space_info, list);
328                 if (found->flags == flags)
329                         return found;
330         }
331         return NULL;
332 }
333
334 static u64 div_factor(u64 num, int factor)
335 {
336         if (factor == 10)
337                 return num;
338         num *= factor;
339         do_div(num, 10);
340         return num;
341 }
342
343 static struct btrfs_block_group_cache *
344 __btrfs_find_block_group(struct btrfs_root *root,
345                          struct btrfs_block_group_cache *hint,
346                          u64 search_start, int data, int owner)
347 {
348         struct btrfs_block_group_cache *cache;
349         struct btrfs_block_group_cache *found_group = NULL;
350         struct btrfs_fs_info *info = root->fs_info;
351         u64 used;
352         u64 last = 0;
353         u64 free_check;
354         int full_search = 0;
355         int factor = 10;
356         int wrapped = 0;
357
358         if (data & BTRFS_BLOCK_GROUP_METADATA)
359                 factor = 9;
360
361         if (search_start) {
362                 struct btrfs_block_group_cache *shint;
363                 shint = btrfs_lookup_first_block_group(info, search_start);
364                 if (shint && block_group_bits(shint, data) && !shint->ro) {
365                         spin_lock(&shint->lock);
366                         used = btrfs_block_group_used(&shint->item);
367                         if (used + shint->pinned + shint->reserved <
368                             div_factor(shint->key.offset, factor)) {
369                                 spin_unlock(&shint->lock);
370                                 return shint;
371                         }
372                         spin_unlock(&shint->lock);
373                 }
374         }
375         if (hint && !hint->ro && block_group_bits(hint, data)) {
376                 spin_lock(&hint->lock);
377                 used = btrfs_block_group_used(&hint->item);
378                 if (used + hint->pinned + hint->reserved <
379                     div_factor(hint->key.offset, factor)) {
380                         spin_unlock(&hint->lock);
381                         return hint;
382                 }
383                 spin_unlock(&hint->lock);
384                 last = hint->key.objectid + hint->key.offset;
385         } else {
386                 if (hint)
387                         last = max(hint->key.objectid, search_start);
388                 else
389                         last = search_start;
390         }
391 again:
392         while (1) {
393                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
394                 if (!cache)
395                         break;
396
397                 spin_lock(&cache->lock);
398                 last = cache->key.objectid + cache->key.offset;
399                 used = btrfs_block_group_used(&cache->item);
400
401                 if (!cache->ro && block_group_bits(cache, data)) {
402                         free_check = div_factor(cache->key.offset, factor);
403                         if (used + cache->pinned + cache->reserved <
404                             free_check) {
405                                 found_group = cache;
406                                 spin_unlock(&cache->lock);
407                                 goto found;
408                         }
409                 }
410                 spin_unlock(&cache->lock);
411                 cond_resched();
412         }
413         if (!wrapped) {
414                 last = search_start;
415                 wrapped = 1;
416                 goto again;
417         }
418         if (!full_search && factor < 10) {
419                 last = search_start;
420                 full_search = 1;
421                 factor = 10;
422                 goto again;
423         }
424 found:
425         return found_group;
426 }
427
428 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
429                                                  struct btrfs_block_group_cache
430                                                  *hint, u64 search_start,
431                                                  int data, int owner)
432 {
433
434         struct btrfs_block_group_cache *ret;
435         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
436         return ret;
437 }
438
439 /* simple helper to search for an existing extent at a given offset */
440 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
441 {
442         int ret;
443         struct btrfs_key key;
444         struct btrfs_path *path;
445
446         path = btrfs_alloc_path();
447         BUG_ON(!path);
448         maybe_lock_mutex(root);
449         key.objectid = start;
450         key.offset = len;
451         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
452         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
453                                 0, 0);
454         maybe_unlock_mutex(root);
455         btrfs_free_path(path);
456         return ret;
457 }
458
459 /*
460  * Back reference rules.  Back refs have three main goals:
461  *
462  * 1) differentiate between all holders of references to an extent so that
463  *    when a reference is dropped we can make sure it was a valid reference
464  *    before freeing the extent.
465  *
466  * 2) Provide enough information to quickly find the holders of an extent
467  *    if we notice a given block is corrupted or bad.
468  *
469  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
470  *    maintenance.  This is actually the same as #2, but with a slightly
471  *    different use case.
472  *
473  * File extents can be referenced by:
474  *
475  * - multiple snapshots, subvolumes, or different generations in one subvol
476  * - different files inside a single subvolume
477  * - different offsets inside a file (bookend extents in file.c)
478  *
479  * The extent ref structure has fields for:
480  *
481  * - Objectid of the subvolume root
482  * - Generation number of the tree holding the reference
483  * - objectid of the file holding the reference
484  * - number of references holding by parent node (alway 1 for tree blocks)
485  *
486  * Btree leaf may hold multiple references to a file extent. In most cases,
487  * these references are from same file and the corresponding offsets inside
488  * the file are close together.
489  *
490  * When a file extent is allocated the fields are filled in:
491  *     (root_key.objectid, trans->transid, inode objectid, 1)
492  *
493  * When a leaf is cow'd new references are added for every file extent found
494  * in the leaf.  It looks similar to the create case, but trans->transid will
495  * be different when the block is cow'd.
496  *
497  *     (root_key.objectid, trans->transid, inode objectid,
498  *      number of references in the leaf)
499  *
500  * When a file extent is removed either during snapshot deletion or
501  * file truncation, we find the corresponding back reference and check
502  * the following fields:
503  *
504  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
505  *      inode objectid)
506  *
507  * Btree extents can be referenced by:
508  *
509  * - Different subvolumes
510  * - Different generations of the same subvolume
511  *
512  * When a tree block is created, back references are inserted:
513  *
514  * (root->root_key.objectid, trans->transid, level, 1)
515  *
516  * When a tree block is cow'd, new back references are added for all the
517  * blocks it points to. If the tree block isn't in reference counted root,
518  * the old back references are removed. These new back references are of
519  * the form (trans->transid will have increased since creation):
520  *
521  * (root->root_key.objectid, trans->transid, level, 1)
522  *
523  * When a backref is in deleting, the following fields are checked:
524  *
525  * if backref was for a tree root:
526  *     (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
527  * else
528  *     (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
529  *
530  * Back Reference Key composing:
531  *
532  * The key objectid corresponds to the first byte in the extent, the key
533  * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
534  * byte of parent extent. If a extent is tree root, the key offset is set
535  * to the key objectid.
536  */
537
538 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
539                                           struct btrfs_root *root,
540                                           struct btrfs_path *path,
541                                           u64 bytenr, u64 parent,
542                                           u64 ref_root, u64 ref_generation,
543                                           u64 owner_objectid, int del)
544 {
545         struct btrfs_key key;
546         struct btrfs_extent_ref *ref;
547         struct extent_buffer *leaf;
548         u64 ref_objectid;
549         int ret;
550
551         key.objectid = bytenr;
552         key.type = BTRFS_EXTENT_REF_KEY;
553         key.offset = parent;
554
555         ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
556         if (ret < 0)
557                 goto out;
558         if (ret > 0) {
559                 ret = -ENOENT;
560                 goto out;
561         }
562
563         leaf = path->nodes[0];
564         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
565         ref_objectid = btrfs_ref_objectid(leaf, ref);
566         if (btrfs_ref_root(leaf, ref) != ref_root ||
567             btrfs_ref_generation(leaf, ref) != ref_generation ||
568             (ref_objectid != owner_objectid &&
569              ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
570                 ret = -EIO;
571                 WARN_ON(1);
572                 goto out;
573         }
574         ret = 0;
575 out:
576         return ret;
577 }
578
579 static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
580                                           struct btrfs_root *root,
581                                           struct btrfs_path *path,
582                                           u64 bytenr, u64 parent,
583                                           u64 ref_root, u64 ref_generation,
584                                           u64 owner_objectid)
585 {
586         struct btrfs_key key;
587         struct extent_buffer *leaf;
588         struct btrfs_extent_ref *ref;
589         u32 num_refs;
590         int ret;
591
592         key.objectid = bytenr;
593         key.type = BTRFS_EXTENT_REF_KEY;
594         key.offset = parent;
595
596         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
597         if (ret == 0) {
598                 leaf = path->nodes[0];
599                 ref = btrfs_item_ptr(leaf, path->slots[0],
600                                      struct btrfs_extent_ref);
601                 btrfs_set_ref_root(leaf, ref, ref_root);
602                 btrfs_set_ref_generation(leaf, ref, ref_generation);
603                 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
604                 btrfs_set_ref_num_refs(leaf, ref, 1);
605         } else if (ret == -EEXIST) {
606                 u64 existing_owner;
607                 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
608                 leaf = path->nodes[0];
609                 ref = btrfs_item_ptr(leaf, path->slots[0],
610                                      struct btrfs_extent_ref);
611                 if (btrfs_ref_root(leaf, ref) != ref_root ||
612                     btrfs_ref_generation(leaf, ref) != ref_generation) {
613                         ret = -EIO;
614                         WARN_ON(1);
615                         goto out;
616                 }
617
618                 num_refs = btrfs_ref_num_refs(leaf, ref);
619                 BUG_ON(num_refs == 0);
620                 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
621
622                 existing_owner = btrfs_ref_objectid(leaf, ref);
623                 if (existing_owner != owner_objectid &&
624                     existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
625                         btrfs_set_ref_objectid(leaf, ref,
626                                         BTRFS_MULTIPLE_OBJECTIDS);
627                 }
628                 ret = 0;
629         } else {
630                 goto out;
631         }
632         btrfs_mark_buffer_dirty(path->nodes[0]);
633 out:
634         btrfs_release_path(root, path);
635         return ret;
636 }
637
638 static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
639                                           struct btrfs_root *root,
640                                           struct btrfs_path *path)
641 {
642         struct extent_buffer *leaf;
643         struct btrfs_extent_ref *ref;
644         u32 num_refs;
645         int ret = 0;
646
647         leaf = path->nodes[0];
648         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
649         num_refs = btrfs_ref_num_refs(leaf, ref);
650         BUG_ON(num_refs == 0);
651         num_refs -= 1;
652         if (num_refs == 0) {
653                 ret = btrfs_del_item(trans, root, path);
654         } else {
655                 btrfs_set_ref_num_refs(leaf, ref, num_refs);
656                 btrfs_mark_buffer_dirty(leaf);
657         }
658         btrfs_release_path(root, path);
659         return ret;
660 }
661
662 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
663                                      struct btrfs_root *root, u64 bytenr,
664                                      u64 orig_parent, u64 parent,
665                                      u64 orig_root, u64 ref_root,
666                                      u64 orig_generation, u64 ref_generation,
667                                      u64 owner_objectid)
668 {
669         int ret;
670         struct btrfs_root *extent_root = root->fs_info->extent_root;
671         struct btrfs_path *path;
672
673         if (root == root->fs_info->extent_root) {
674                 struct pending_extent_op *extent_op;
675                 u64 num_bytes;
676
677                 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
678                 num_bytes = btrfs_level_size(root, (int)owner_objectid);
679                 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
680                                 bytenr + num_bytes - 1, EXTENT_LOCKED, 0)) {
681                         u64 priv;
682                         ret = get_state_private(&root->fs_info->extent_ins,
683                                                 bytenr, &priv);
684                         BUG_ON(ret);
685                         extent_op = (struct pending_extent_op *)
686                                                         (unsigned long)priv;
687                         BUG_ON(extent_op->parent != orig_parent);
688                         BUG_ON(extent_op->generation != orig_generation);
689                         extent_op->parent = parent;
690                         extent_op->generation = ref_generation;
691                 } else {
692                         extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
693                         BUG_ON(!extent_op);
694
695                         extent_op->type = PENDING_BACKREF_UPDATE;
696                         extent_op->bytenr = bytenr;
697                         extent_op->num_bytes = num_bytes;
698                         extent_op->parent = parent;
699                         extent_op->orig_parent = orig_parent;
700                         extent_op->generation = ref_generation;
701                         extent_op->orig_generation = orig_generation;
702                         extent_op->level = (int)owner_objectid;
703
704                         set_extent_bits(&root->fs_info->extent_ins,
705                                         bytenr, bytenr + num_bytes - 1,
706                                         EXTENT_LOCKED, GFP_NOFS);
707                         set_state_private(&root->fs_info->extent_ins,
708                                           bytenr, (unsigned long)extent_op);
709                 }
710                 return 0;
711         }
712
713         path = btrfs_alloc_path();
714         if (!path)
715                 return -ENOMEM;
716         ret = lookup_extent_backref(trans, extent_root, path,
717                                     bytenr, orig_parent, orig_root,
718                                     orig_generation, owner_objectid, 1);
719         if (ret)
720                 goto out;
721         ret = remove_extent_backref(trans, extent_root, path);
722         if (ret)
723                 goto out;
724         ret = insert_extent_backref(trans, extent_root, path, bytenr,
725                                     parent, ref_root, ref_generation,
726                                     owner_objectid);
727         BUG_ON(ret);
728         finish_current_insert(trans, extent_root);
729         del_pending_extents(trans, extent_root);
730 out:
731         btrfs_free_path(path);
732         return ret;
733 }
734
735 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
736                             struct btrfs_root *root, u64 bytenr,
737                             u64 orig_parent, u64 parent,
738                             u64 ref_root, u64 ref_generation,
739                             u64 owner_objectid)
740 {
741         int ret;
742         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
743             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
744                 return 0;
745         maybe_lock_mutex(root);
746         ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
747                                         parent, ref_root, ref_root,
748                                         ref_generation, ref_generation,
749                                         owner_objectid);
750         maybe_unlock_mutex(root);
751         return ret;
752 }
753
754 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
755                                   struct btrfs_root *root, u64 bytenr,
756                                   u64 orig_parent, u64 parent,
757                                   u64 orig_root, u64 ref_root,
758                                   u64 orig_generation, u64 ref_generation,
759                                   u64 owner_objectid)
760 {
761         struct btrfs_path *path;
762         int ret;
763         struct btrfs_key key;
764         struct extent_buffer *l;
765         struct btrfs_extent_item *item;
766         u32 refs;
767
768         path = btrfs_alloc_path();
769         if (!path)
770                 return -ENOMEM;
771
772         path->reada = 1;
773         key.objectid = bytenr;
774         key.type = BTRFS_EXTENT_ITEM_KEY;
775         key.offset = (u64)-1;
776
777         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
778                                 0, 1);
779         if (ret < 0)
780                 return ret;
781         BUG_ON(ret == 0 || path->slots[0] == 0);
782
783         path->slots[0]--;
784         l = path->nodes[0];
785
786         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
787         BUG_ON(key.objectid != bytenr);
788         BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
789
790         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
791         refs = btrfs_extent_refs(l, item);
792         btrfs_set_extent_refs(l, item, refs + 1);
793         btrfs_mark_buffer_dirty(path->nodes[0]);
794
795         btrfs_release_path(root->fs_info->extent_root, path);
796
797         path->reada = 1;
798         ret = insert_extent_backref(trans, root->fs_info->extent_root,
799                                     path, bytenr, parent,
800                                     ref_root, ref_generation,
801                                     owner_objectid);
802         BUG_ON(ret);
803         finish_current_insert(trans, root->fs_info->extent_root);
804         del_pending_extents(trans, root->fs_info->extent_root);
805
806         btrfs_free_path(path);
807         return 0;
808 }
809
810 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
811                          struct btrfs_root *root,
812                          u64 bytenr, u64 num_bytes, u64 parent,
813                          u64 ref_root, u64 ref_generation,
814                          u64 owner_objectid)
815 {
816         int ret;
817         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
818             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
819                 return 0;
820         maybe_lock_mutex(root);
821         ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
822                                      0, ref_root, 0, ref_generation,
823                                      owner_objectid);
824         maybe_unlock_mutex(root);
825         return ret;
826 }
827
828 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
829                          struct btrfs_root *root)
830 {
831         finish_current_insert(trans, root->fs_info->extent_root);
832         del_pending_extents(trans, root->fs_info->extent_root);
833         return 0;
834 }
835
836 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
837                             struct btrfs_root *root, u64 bytenr,
838                             u64 num_bytes, u32 *refs)
839 {
840         struct btrfs_path *path;
841         int ret;
842         struct btrfs_key key;
843         struct extent_buffer *l;
844         struct btrfs_extent_item *item;
845
846         WARN_ON(num_bytes < root->sectorsize);
847         path = btrfs_alloc_path();
848         path->reada = 1;
849         key.objectid = bytenr;
850         key.offset = num_bytes;
851         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
852         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
853                                 0, 0);
854         if (ret < 0)
855                 goto out;
856         if (ret != 0) {
857                 btrfs_print_leaf(root, path->nodes[0]);
858                 printk("failed to find block number %Lu\n", bytenr);
859                 BUG();
860         }
861         l = path->nodes[0];
862         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
863         *refs = btrfs_extent_refs(l, item);
864 out:
865         btrfs_free_path(path);
866         return 0;
867 }
868
869 static int get_reference_status(struct btrfs_root *root, u64 bytenr,
870                                 u64 parent_gen, u64 ref_objectid,
871                                 u64 *min_generation, u32 *ref_count)
872 {
873         struct btrfs_root *extent_root = root->fs_info->extent_root;
874         struct btrfs_path *path;
875         struct extent_buffer *leaf;
876         struct btrfs_extent_ref *ref_item;
877         struct btrfs_key key;
878         struct btrfs_key found_key;
879         u64 root_objectid = root->root_key.objectid;
880         u64 ref_generation;
881         u32 nritems;
882         int ret;
883
884         key.objectid = bytenr;
885         key.offset = (u64)-1;
886         key.type = BTRFS_EXTENT_ITEM_KEY;
887
888         path = btrfs_alloc_path();
889         mutex_lock(&root->fs_info->alloc_mutex);
890         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
891         if (ret < 0)
892                 goto out;
893         BUG_ON(ret == 0);
894         if (ret < 0 || path->slots[0] == 0)
895                 goto out;
896
897         path->slots[0]--;
898         leaf = path->nodes[0];
899         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
900
901         if (found_key.objectid != bytenr ||
902             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
903                 ret = 1;
904                 goto out;
905         }
906
907         *ref_count = 0;
908         *min_generation = (u64)-1;
909
910         while (1) {
911                 leaf = path->nodes[0];
912                 nritems = btrfs_header_nritems(leaf);
913                 if (path->slots[0] >= nritems) {
914                         ret = btrfs_next_leaf(extent_root, path);
915                         if (ret < 0)
916                                 goto out;
917                         if (ret == 0)
918                                 continue;
919                         break;
920                 }
921                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
922                 if (found_key.objectid != bytenr)
923                         break;
924
925                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
926                         path->slots[0]++;
927                         continue;
928                 }
929
930                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
931                                           struct btrfs_extent_ref);
932                 ref_generation = btrfs_ref_generation(leaf, ref_item);
933                 /*
934                  * For (parent_gen > 0 && parent_gen > ref_generation):
935                  *
936                  * we reach here through the oldest root, therefore
937                  * all other reference from same snapshot should have
938                  * a larger generation.
939                  */
940                 if ((root_objectid != btrfs_ref_root(leaf, ref_item)) ||
941                     (parent_gen > 0 && parent_gen > ref_generation) ||
942                     (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
943                      ref_objectid != btrfs_ref_objectid(leaf, ref_item))) {
944                         *ref_count = 2;
945                         break;
946                 }
947
948                 *ref_count = 1;
949                 if (*min_generation > ref_generation)
950                         *min_generation = ref_generation;
951
952                 path->slots[0]++;
953         }
954         ret = 0;
955 out:
956         mutex_unlock(&root->fs_info->alloc_mutex);
957         btrfs_free_path(path);
958         return ret;
959 }
960
961 int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
962                            struct btrfs_root *root,
963                            struct btrfs_key *key, u64 bytenr)
964 {
965         struct btrfs_root *old_root;
966         struct btrfs_path *path = NULL;
967         struct extent_buffer *eb;
968         struct btrfs_file_extent_item *item;
969         u64 ref_generation;
970         u64 min_generation;
971         u64 extent_start;
972         u32 ref_count;
973         int level;
974         int ret;
975
976         BUG_ON(trans == NULL);
977         BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
978         ret = get_reference_status(root, bytenr, 0, key->objectid,
979                                    &min_generation, &ref_count);
980         if (ret)
981                 return ret;
982
983         if (ref_count != 1)
984                 return 1;
985
986         old_root = root->dirty_root->root;
987         ref_generation = old_root->root_key.offset;
988
989         /* all references are created in running transaction */
990         if (min_generation > ref_generation) {
991                 ret = 0;
992                 goto out;
993         }
994
995         path = btrfs_alloc_path();
996         if (!path) {
997                 ret = -ENOMEM;
998                 goto out;
999         }
1000
1001         path->skip_locking = 1;
1002         /* if no item found, the extent is referenced by other snapshot */
1003         ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0);
1004         if (ret)
1005                 goto out;
1006
1007         eb = path->nodes[0];
1008         item = btrfs_item_ptr(eb, path->slots[0],
1009                               struct btrfs_file_extent_item);
1010         if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG ||
1011             btrfs_file_extent_disk_bytenr(eb, item) != bytenr) {
1012                 ret = 1;
1013                 goto out;
1014         }
1015
1016         for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) {
1017                 if (level >= 0) {
1018                         eb = path->nodes[level];
1019                         if (!eb)
1020                                 continue;
1021                         extent_start = eb->start;
1022                 } else
1023                         extent_start = bytenr;
1024
1025                 ret = get_reference_status(root, extent_start, ref_generation,
1026                                            0, &min_generation, &ref_count);
1027                 if (ret)
1028                         goto out;
1029
1030                 if (ref_count != 1) {
1031                         ret = 1;
1032                         goto out;
1033                 }
1034                 if (level >= 0)
1035                         ref_generation = btrfs_header_generation(eb);
1036         }
1037         ret = 0;
1038 out:
1039         if (path)
1040                 btrfs_free_path(path);
1041         return ret;
1042 }
1043
1044 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1045                     struct extent_buffer *buf, u32 nr_extents)
1046 {
1047         struct btrfs_key key;
1048         struct btrfs_file_extent_item *fi;
1049         u64 root_gen;
1050         u32 nritems;
1051         int i;
1052         int level;
1053         int ret = 0;
1054         int shared = 0;
1055
1056         if (!root->ref_cows)
1057                 return 0;
1058
1059         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1060                 shared = 0;
1061                 root_gen = root->root_key.offset;
1062         } else {
1063                 shared = 1;
1064                 root_gen = trans->transid - 1;
1065         }
1066
1067         level = btrfs_header_level(buf);
1068         nritems = btrfs_header_nritems(buf);
1069
1070         if (level == 0) {
1071                 struct btrfs_leaf_ref *ref;
1072                 struct btrfs_extent_info *info;
1073
1074                 ref = btrfs_alloc_leaf_ref(root, nr_extents);
1075                 if (!ref) {
1076                         ret = -ENOMEM;
1077                         goto out;
1078                 }
1079
1080                 ref->root_gen = root_gen;
1081                 ref->bytenr = buf->start;
1082                 ref->owner = btrfs_header_owner(buf);
1083                 ref->generation = btrfs_header_generation(buf);
1084                 ref->nritems = nr_extents;
1085                 info = ref->extents;
1086
1087                 for (i = 0; nr_extents > 0 && i < nritems; i++) {
1088                         u64 disk_bytenr;
1089                         btrfs_item_key_to_cpu(buf, &key, i);
1090                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1091                                 continue;
1092                         fi = btrfs_item_ptr(buf, i,
1093                                             struct btrfs_file_extent_item);
1094                         if (btrfs_file_extent_type(buf, fi) ==
1095                             BTRFS_FILE_EXTENT_INLINE)
1096                                 continue;
1097                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1098                         if (disk_bytenr == 0)
1099                                 continue;
1100
1101                         info->bytenr = disk_bytenr;
1102                         info->num_bytes =
1103                                 btrfs_file_extent_disk_num_bytes(buf, fi);
1104                         info->objectid = key.objectid;
1105                         info->offset = key.offset;
1106                         info++;
1107                 }
1108
1109                 ret = btrfs_add_leaf_ref(root, ref, shared);
1110                 if (ret == -EEXIST && shared) {
1111                         struct btrfs_leaf_ref *old;
1112                         old = btrfs_lookup_leaf_ref(root, ref->bytenr);
1113                         BUG_ON(!old);
1114                         btrfs_remove_leaf_ref(root, old);
1115                         btrfs_free_leaf_ref(root, old);
1116                         ret = btrfs_add_leaf_ref(root, ref, shared);
1117                 }
1118                 WARN_ON(ret);
1119                 btrfs_free_leaf_ref(root, ref);
1120         }
1121 out:
1122         return ret;
1123 }
1124
1125 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1126                   struct extent_buffer *orig_buf, struct extent_buffer *buf,
1127                   u32 *nr_extents)
1128 {
1129         u64 bytenr;
1130         u64 ref_root;
1131         u64 orig_root;
1132         u64 ref_generation;
1133         u64 orig_generation;
1134         u32 nritems;
1135         u32 nr_file_extents = 0;
1136         struct btrfs_key key;
1137         struct btrfs_file_extent_item *fi;
1138         int i;
1139         int level;
1140         int ret = 0;
1141         int faili = 0;
1142         int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1143                             u64, u64, u64, u64, u64, u64, u64, u64);
1144
1145         ref_root = btrfs_header_owner(buf);
1146         ref_generation = btrfs_header_generation(buf);
1147         orig_root = btrfs_header_owner(orig_buf);
1148         orig_generation = btrfs_header_generation(orig_buf);
1149
1150         nritems = btrfs_header_nritems(buf);
1151         level = btrfs_header_level(buf);
1152
1153         if (root->ref_cows) {
1154                 process_func = __btrfs_inc_extent_ref;
1155         } else {
1156                 if (level == 0 &&
1157                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1158                         goto out;
1159                 if (level != 0 &&
1160                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1161                         goto out;
1162                 process_func = __btrfs_update_extent_ref;
1163         }
1164
1165         for (i = 0; i < nritems; i++) {
1166                 cond_resched();
1167                 if (level == 0) {
1168                         btrfs_item_key_to_cpu(buf, &key, i);
1169                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1170                                 continue;
1171                         fi = btrfs_item_ptr(buf, i,
1172                                             struct btrfs_file_extent_item);
1173                         if (btrfs_file_extent_type(buf, fi) ==
1174                             BTRFS_FILE_EXTENT_INLINE)
1175                                 continue;
1176                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1177                         if (bytenr == 0)
1178                                 continue;
1179
1180                         nr_file_extents++;
1181
1182                         maybe_lock_mutex(root);
1183                         ret = process_func(trans, root, bytenr,
1184                                            orig_buf->start, buf->start,
1185                                            orig_root, ref_root,
1186                                            orig_generation, ref_generation,
1187                                            key.objectid);
1188                         maybe_unlock_mutex(root);
1189
1190                         if (ret) {
1191                                 faili = i;
1192                                 WARN_ON(1);
1193                                 goto fail;
1194                         }
1195                 } else {
1196                         bytenr = btrfs_node_blockptr(buf, i);
1197                         maybe_lock_mutex(root);
1198                         ret = process_func(trans, root, bytenr,
1199                                            orig_buf->start, buf->start,
1200                                            orig_root, ref_root,
1201                                            orig_generation, ref_generation,
1202                                            level - 1);
1203                         maybe_unlock_mutex(root);
1204                         if (ret) {
1205                                 faili = i;
1206                                 WARN_ON(1);
1207                                 goto fail;
1208                         }
1209                 }
1210         }
1211 out:
1212         if (nr_extents) {
1213                 if (level == 0)
1214                         *nr_extents = nr_file_extents;
1215                 else
1216                         *nr_extents = nritems;
1217         }
1218         return 0;
1219 fail:
1220         WARN_ON(1);
1221         return ret;
1222 }
1223
1224 int btrfs_update_ref(struct btrfs_trans_handle *trans,
1225                      struct btrfs_root *root, struct extent_buffer *orig_buf,
1226                      struct extent_buffer *buf, int start_slot, int nr)
1227
1228 {
1229         u64 bytenr;
1230         u64 ref_root;
1231         u64 orig_root;
1232         u64 ref_generation;
1233         u64 orig_generation;
1234         struct btrfs_key key;
1235         struct btrfs_file_extent_item *fi;
1236         int i;
1237         int ret;
1238         int slot;
1239         int level;
1240
1241         BUG_ON(start_slot < 0);
1242         BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1243
1244         ref_root = btrfs_header_owner(buf);
1245         ref_generation = btrfs_header_generation(buf);
1246         orig_root = btrfs_header_owner(orig_buf);
1247         orig_generation = btrfs_header_generation(orig_buf);
1248         level = btrfs_header_level(buf);
1249
1250         if (!root->ref_cows) {
1251                 if (level == 0 &&
1252                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1253                         return 0;
1254                 if (level != 0 &&
1255                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1256                         return 0;
1257         }
1258
1259         for (i = 0, slot = start_slot; i < nr; i++, slot++) {
1260                 cond_resched();
1261                 if (level == 0) {
1262                         btrfs_item_key_to_cpu(buf, &key, slot);
1263                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1264                                 continue;
1265                         fi = btrfs_item_ptr(buf, slot,
1266                                             struct btrfs_file_extent_item);
1267                         if (btrfs_file_extent_type(buf, fi) ==
1268                             BTRFS_FILE_EXTENT_INLINE)
1269                                 continue;
1270                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1271                         if (bytenr == 0)
1272                                 continue;
1273                         maybe_lock_mutex(root);
1274                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1275                                             orig_buf->start, buf->start,
1276                                             orig_root, ref_root,
1277                                             orig_generation, ref_generation,
1278                                             key.objectid);
1279                         maybe_unlock_mutex(root);
1280                         if (ret)
1281                                 goto fail;
1282                 } else {
1283                         bytenr = btrfs_node_blockptr(buf, slot);
1284                         maybe_lock_mutex(root);
1285                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1286                                             orig_buf->start, buf->start,
1287                                             orig_root, ref_root,
1288                                             orig_generation, ref_generation,
1289                                             level - 1);
1290                         maybe_unlock_mutex(root);
1291                         if (ret)
1292                                 goto fail;
1293                 }
1294         }
1295         return 0;
1296 fail:
1297         WARN_ON(1);
1298         return -1;
1299 }
1300
1301 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1302                                  struct btrfs_root *root,
1303                                  struct btrfs_path *path,
1304                                  struct btrfs_block_group_cache *cache)
1305 {
1306         int ret;
1307         int pending_ret;
1308         struct btrfs_root *extent_root = root->fs_info->extent_root;
1309         unsigned long bi;
1310         struct extent_buffer *leaf;
1311
1312         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1313         if (ret < 0)
1314                 goto fail;
1315         BUG_ON(ret);
1316
1317         leaf = path->nodes[0];
1318         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1319         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1320         btrfs_mark_buffer_dirty(leaf);
1321         btrfs_release_path(extent_root, path);
1322 fail:
1323         finish_current_insert(trans, extent_root);
1324         pending_ret = del_pending_extents(trans, extent_root);
1325         if (ret)
1326                 return ret;
1327         if (pending_ret)
1328                 return pending_ret;
1329         return 0;
1330
1331 }
1332
1333 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1334                                    struct btrfs_root *root)
1335 {
1336         struct btrfs_block_group_cache *cache, *entry;
1337         struct rb_node *n;
1338         int err = 0;
1339         int werr = 0;
1340         struct btrfs_path *path;
1341         u64 last = 0;
1342
1343         path = btrfs_alloc_path();
1344         if (!path)
1345                 return -ENOMEM;
1346
1347         mutex_lock(&root->fs_info->alloc_mutex);
1348         while(1) {
1349                 cache = NULL;
1350                 spin_lock(&root->fs_info->block_group_cache_lock);
1351                 for (n = rb_first(&root->fs_info->block_group_cache_tree);
1352                      n; n = rb_next(n)) {
1353                         entry = rb_entry(n, struct btrfs_block_group_cache,
1354                                          cache_node);
1355                         if (entry->dirty) {
1356                                 cache = entry;
1357                                 break;
1358                         }
1359                 }
1360                 spin_unlock(&root->fs_info->block_group_cache_lock);
1361
1362                 if (!cache)
1363                         break;
1364
1365                 cache->dirty = 0;
1366                 last += cache->key.offset;
1367
1368                 err = write_one_cache_group(trans, root,
1369                                             path, cache);
1370                 /*
1371                  * if we fail to write the cache group, we want
1372                  * to keep it marked dirty in hopes that a later
1373                  * write will work
1374                  */
1375                 if (err) {
1376                         werr = err;
1377                         continue;
1378                 }
1379         }
1380         btrfs_free_path(path);
1381         mutex_unlock(&root->fs_info->alloc_mutex);
1382         return werr;
1383 }
1384
1385 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1386                              u64 total_bytes, u64 bytes_used,
1387                              struct btrfs_space_info **space_info)
1388 {
1389         struct btrfs_space_info *found;
1390
1391         found = __find_space_info(info, flags);
1392         if (found) {
1393                 found->total_bytes += total_bytes;
1394                 found->bytes_used += bytes_used;
1395                 found->full = 0;
1396                 *space_info = found;
1397                 return 0;
1398         }
1399         found = kmalloc(sizeof(*found), GFP_NOFS);
1400         if (!found)
1401                 return -ENOMEM;
1402
1403         list_add(&found->list, &info->space_info);
1404         INIT_LIST_HEAD(&found->block_groups);
1405         init_rwsem(&found->groups_sem);
1406         spin_lock_init(&found->lock);
1407         found->flags = flags;
1408         found->total_bytes = total_bytes;
1409         found->bytes_used = bytes_used;
1410         found->bytes_pinned = 0;
1411         found->bytes_reserved = 0;
1412         found->full = 0;
1413         found->force_alloc = 0;
1414         *space_info = found;
1415         return 0;
1416 }
1417
1418 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1419 {
1420         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1421                                    BTRFS_BLOCK_GROUP_RAID1 |
1422                                    BTRFS_BLOCK_GROUP_RAID10 |
1423                                    BTRFS_BLOCK_GROUP_DUP);
1424         if (extra_flags) {
1425                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1426                         fs_info->avail_data_alloc_bits |= extra_flags;
1427                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1428                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1429                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1430                         fs_info->avail_system_alloc_bits |= extra_flags;
1431         }
1432 }
1433
1434 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1435 {
1436         u64 num_devices = root->fs_info->fs_devices->num_devices;
1437
1438         if (num_devices == 1)
1439                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1440         if (num_devices < 4)
1441                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1442
1443         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1444             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1445                       BTRFS_BLOCK_GROUP_RAID10))) {
1446                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1447         }
1448
1449         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1450             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1451                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1452         }
1453
1454         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1455             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1456              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1457              (flags & BTRFS_BLOCK_GROUP_DUP)))
1458                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1459         return flags;
1460 }
1461
1462 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1463                           struct btrfs_root *extent_root, u64 alloc_bytes,
1464                           u64 flags, int force)
1465 {
1466         struct btrfs_space_info *space_info;
1467         u64 thresh;
1468         u64 start;
1469         u64 num_bytes;
1470         int ret = 0, waited = 0;
1471
1472         flags = reduce_alloc_profile(extent_root, flags);
1473
1474         space_info = __find_space_info(extent_root->fs_info, flags);
1475         if (!space_info) {
1476                 ret = update_space_info(extent_root->fs_info, flags,
1477                                         0, 0, &space_info);
1478                 BUG_ON(ret);
1479         }
1480         BUG_ON(!space_info);
1481
1482         if (space_info->force_alloc) {
1483                 force = 1;
1484                 space_info->force_alloc = 0;
1485         }
1486         if (space_info->full)
1487                 goto out;
1488
1489         thresh = div_factor(space_info->total_bytes, 6);
1490         if (!force &&
1491            (space_info->bytes_used + space_info->bytes_pinned +
1492             space_info->bytes_reserved + alloc_bytes) < thresh)
1493                 goto out;
1494
1495         while (!mutex_trylock(&extent_root->fs_info->chunk_mutex)) {
1496                 if (!force)
1497                         goto out;
1498                 mutex_unlock(&extent_root->fs_info->alloc_mutex);
1499                 cond_resched();
1500                 mutex_lock(&extent_root->fs_info->alloc_mutex);
1501                 waited = 1;
1502         }
1503
1504         if (waited && space_info->full)
1505                 goto out_unlock;
1506
1507         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1508         if (ret == -ENOSPC) {
1509 printk("space info full %Lu\n", flags);
1510                 space_info->full = 1;
1511                 goto out_unlock;
1512         }
1513         BUG_ON(ret);
1514
1515         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1516                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1517         BUG_ON(ret);
1518
1519 out_unlock:
1520         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1521 out:
1522         return ret;
1523 }
1524
1525 static int update_block_group(struct btrfs_trans_handle *trans,
1526                               struct btrfs_root *root,
1527                               u64 bytenr, u64 num_bytes, int alloc,
1528                               int mark_free)
1529 {
1530         struct btrfs_block_group_cache *cache;
1531         struct btrfs_fs_info *info = root->fs_info;
1532         u64 total = num_bytes;
1533         u64 old_val;
1534         u64 byte_in_group;
1535
1536         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1537         while(total) {
1538                 cache = btrfs_lookup_block_group(info, bytenr);
1539                 if (!cache) {
1540                         return -1;
1541                 }
1542                 byte_in_group = bytenr - cache->key.objectid;
1543                 WARN_ON(byte_in_group > cache->key.offset);
1544
1545                 spin_lock(&cache->lock);
1546                 cache->dirty = 1;
1547                 old_val = btrfs_block_group_used(&cache->item);
1548                 num_bytes = min(total, cache->key.offset - byte_in_group);
1549                 if (alloc) {
1550                         old_val += num_bytes;
1551                         cache->space_info->bytes_used += num_bytes;
1552                         btrfs_set_block_group_used(&cache->item, old_val);
1553                         spin_unlock(&cache->lock);
1554                 } else {
1555                         old_val -= num_bytes;
1556                         cache->space_info->bytes_used -= num_bytes;
1557                         btrfs_set_block_group_used(&cache->item, old_val);
1558                         spin_unlock(&cache->lock);
1559                         if (mark_free) {
1560                                 int ret;
1561                                 ret = btrfs_add_free_space(cache, bytenr,
1562                                                            num_bytes);
1563                                 if (ret)
1564                                         return -1;
1565                         }
1566                 }
1567                 total -= num_bytes;
1568                 bytenr += num_bytes;
1569         }
1570         return 0;
1571 }
1572
1573 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1574 {
1575         struct btrfs_block_group_cache *cache;
1576
1577         cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
1578         if (!cache)
1579                 return 0;
1580
1581         return cache->key.objectid;
1582 }
1583
1584 int btrfs_update_pinned_extents(struct btrfs_root *root,
1585                                 u64 bytenr, u64 num, int pin)
1586 {
1587         u64 len;
1588         struct btrfs_block_group_cache *cache;
1589         struct btrfs_fs_info *fs_info = root->fs_info;
1590
1591         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1592         if (pin) {
1593                 set_extent_dirty(&fs_info->pinned_extents,
1594                                 bytenr, bytenr + num - 1, GFP_NOFS);
1595         } else {
1596                 clear_extent_dirty(&fs_info->pinned_extents,
1597                                 bytenr, bytenr + num - 1, GFP_NOFS);
1598         }
1599         while (num > 0) {
1600                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1601                 BUG_ON(!cache);
1602                 len = min(num, cache->key.offset -
1603                           (bytenr - cache->key.objectid));
1604                 if (pin) {
1605                         spin_lock(&cache->lock);
1606                         cache->pinned += len;
1607                         cache->space_info->bytes_pinned += len;
1608                         spin_unlock(&cache->lock);
1609                         fs_info->total_pinned += len;
1610                 } else {
1611                         spin_lock(&cache->lock);
1612                         cache->pinned -= len;
1613                         cache->space_info->bytes_pinned -= len;
1614                         spin_unlock(&cache->lock);
1615                         fs_info->total_pinned -= len;
1616                 }
1617                 bytenr += len;
1618                 num -= len;
1619         }
1620         return 0;
1621 }
1622
1623 static int update_reserved_extents(struct btrfs_root *root,
1624                                    u64 bytenr, u64 num, int reserve)
1625 {
1626         u64 len;
1627         struct btrfs_block_group_cache *cache;
1628         struct btrfs_fs_info *fs_info = root->fs_info;
1629
1630         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1631         while (num > 0) {
1632                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1633                 BUG_ON(!cache);
1634                 len = min(num, cache->key.offset -
1635                           (bytenr - cache->key.objectid));
1636                 if (reserve) {
1637                         spin_lock(&cache->lock);
1638                         cache->reserved += len;
1639                         cache->space_info->bytes_reserved += len;
1640                         spin_unlock(&cache->lock);
1641                 } else {
1642                         spin_lock(&cache->lock);
1643                         cache->reserved -= len;
1644                         cache->space_info->bytes_reserved -= len;
1645                         spin_unlock(&cache->lock);
1646                 }
1647                 bytenr += len;
1648                 num -= len;
1649         }
1650         return 0;
1651 }
1652
1653 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1654 {
1655         u64 last = 0;
1656         u64 start;
1657         u64 end;
1658         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1659         int ret;
1660
1661         while(1) {
1662                 ret = find_first_extent_bit(pinned_extents, last,
1663                                             &start, &end, EXTENT_DIRTY);
1664                 if (ret)
1665                         break;
1666                 set_extent_dirty(copy, start, end, GFP_NOFS);
1667                 last = end + 1;
1668         }
1669         return 0;
1670 }
1671
1672 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1673                                struct btrfs_root *root,
1674                                struct extent_io_tree *unpin)
1675 {
1676         u64 start;
1677         u64 end;
1678         int ret;
1679         struct btrfs_block_group_cache *cache;
1680
1681         mutex_lock(&root->fs_info->alloc_mutex);
1682         while(1) {
1683                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1684                                             EXTENT_DIRTY);
1685                 if (ret)
1686                         break;
1687                 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
1688                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1689                 cache = btrfs_lookup_block_group(root->fs_info, start);
1690                 if (cache->cached)
1691                         btrfs_add_free_space(cache, start, end - start + 1);
1692                 if (need_resched()) {
1693                         mutex_unlock(&root->fs_info->alloc_mutex);
1694                         cond_resched();
1695                         mutex_lock(&root->fs_info->alloc_mutex);
1696                 }
1697         }
1698         mutex_unlock(&root->fs_info->alloc_mutex);
1699         return 0;
1700 }
1701
1702 static int finish_current_insert(struct btrfs_trans_handle *trans,
1703                                  struct btrfs_root *extent_root)
1704 {
1705         u64 start;
1706         u64 end;
1707         u64 priv;
1708         struct btrfs_fs_info *info = extent_root->fs_info;
1709         struct btrfs_path *path;
1710         struct btrfs_extent_ref *ref;
1711         struct pending_extent_op *extent_op;
1712         struct btrfs_key key;
1713         struct btrfs_extent_item extent_item;
1714         int ret;
1715         int err = 0;
1716
1717         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1718         btrfs_set_stack_extent_refs(&extent_item, 1);
1719         path = btrfs_alloc_path();
1720
1721         while(1) {
1722                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1723                                             &end, EXTENT_LOCKED);
1724                 if (ret)
1725                         break;
1726
1727                 ret = get_state_private(&info->extent_ins, start, &priv);
1728                 BUG_ON(ret);
1729                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
1730
1731                 if (extent_op->type == PENDING_EXTENT_INSERT) {
1732                         key.objectid = start;
1733                         key.offset = end + 1 - start;
1734                         key.type = BTRFS_EXTENT_ITEM_KEY;
1735                         err = btrfs_insert_item(trans, extent_root, &key,
1736                                         &extent_item, sizeof(extent_item));
1737                         BUG_ON(err);
1738
1739                         clear_extent_bits(&info->extent_ins, start, end,
1740                                           EXTENT_LOCKED, GFP_NOFS);
1741
1742                         err = insert_extent_backref(trans, extent_root, path,
1743                                                 start, extent_op->parent,
1744                                                 extent_root->root_key.objectid,
1745                                                 extent_op->generation,
1746                                                 extent_op->level);
1747                         BUG_ON(err);
1748                 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
1749                         err = lookup_extent_backref(trans, extent_root, path,
1750                                                 start, extent_op->orig_parent,
1751                                                 extent_root->root_key.objectid,
1752                                                 extent_op->orig_generation,
1753                                                 extent_op->level, 0);
1754                         BUG_ON(err);
1755
1756                         clear_extent_bits(&info->extent_ins, start, end,
1757                                           EXTENT_LOCKED, GFP_NOFS);
1758
1759                         key.objectid = start;
1760                         key.offset = extent_op->parent;
1761                         key.type = BTRFS_EXTENT_REF_KEY;
1762                         err = btrfs_set_item_key_safe(trans, extent_root, path,
1763                                                       &key);
1764                         BUG_ON(err);
1765                         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1766                                              struct btrfs_extent_ref);
1767                         btrfs_set_ref_generation(path->nodes[0], ref,
1768                                                  extent_op->generation);
1769                         btrfs_mark_buffer_dirty(path->nodes[0]);
1770                         btrfs_release_path(extent_root, path);
1771                 } else {
1772                         BUG_ON(1);
1773                 }
1774                 kfree(extent_op);
1775
1776                 if (need_resched()) {
1777                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
1778                         cond_resched();
1779                         mutex_lock(&extent_root->fs_info->alloc_mutex);
1780                 }
1781         }
1782         btrfs_free_path(path);
1783         return 0;
1784 }
1785
1786 static int pin_down_bytes(struct btrfs_trans_handle *trans,
1787                           struct btrfs_root *root,
1788                           u64 bytenr, u64 num_bytes, int is_data)
1789 {
1790         int err = 0;
1791         struct extent_buffer *buf;
1792
1793         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1794         if (is_data)
1795                 goto pinit;
1796
1797         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1798         if (!buf)
1799                 goto pinit;
1800
1801         /* we can reuse a block if it hasn't been written
1802          * and it is from this transaction.  We can't
1803          * reuse anything from the tree log root because
1804          * it has tiny sub-transactions.
1805          */
1806         if (btrfs_buffer_uptodate(buf, 0) &&
1807             btrfs_try_tree_lock(buf)) {
1808                 u64 header_owner = btrfs_header_owner(buf);
1809                 u64 header_transid = btrfs_header_generation(buf);
1810                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
1811                     header_owner != BTRFS_TREE_RELOC_OBJECTID &&
1812                     header_transid == trans->transid &&
1813                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
1814                         clean_tree_block(NULL, root, buf);
1815                         btrfs_tree_unlock(buf);
1816                         free_extent_buffer(buf);
1817                         return 1;
1818                 }
1819                 btrfs_tree_unlock(buf);
1820         }
1821         free_extent_buffer(buf);
1822 pinit:
1823         btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
1824
1825         BUG_ON(err < 0);
1826         return 0;
1827 }
1828
1829 /*
1830  * remove an extent from the root, returns 0 on success
1831  */
1832 static int __free_extent(struct btrfs_trans_handle *trans,
1833                          struct btrfs_root *root,
1834                          u64 bytenr, u64 num_bytes, u64 parent,
1835                          u64 root_objectid, u64 ref_generation,
1836                          u64 owner_objectid, int pin, int mark_free)
1837 {
1838         struct btrfs_path *path;
1839         struct btrfs_key key;
1840         struct btrfs_fs_info *info = root->fs_info;
1841         struct btrfs_root *extent_root = info->extent_root;
1842         struct extent_buffer *leaf;
1843         int ret;
1844         int extent_slot = 0;
1845         int found_extent = 0;
1846         int num_to_del = 1;
1847         struct btrfs_extent_item *ei;
1848         u32 refs;
1849
1850         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1851         key.objectid = bytenr;
1852         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1853         key.offset = num_bytes;
1854         path = btrfs_alloc_path();
1855         if (!path)
1856                 return -ENOMEM;
1857
1858         path->reada = 1;
1859         ret = lookup_extent_backref(trans, extent_root, path,
1860                                     bytenr, parent, root_objectid,
1861                                     ref_generation, owner_objectid, 1);
1862         if (ret == 0) {
1863                 struct btrfs_key found_key;
1864                 extent_slot = path->slots[0];
1865                 while(extent_slot > 0) {
1866                         extent_slot--;
1867                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1868                                               extent_slot);
1869                         if (found_key.objectid != bytenr)
1870                                 break;
1871                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1872                             found_key.offset == num_bytes) {
1873                                 found_extent = 1;
1874                                 break;
1875                         }
1876                         if (path->slots[0] - extent_slot > 5)
1877                                 break;
1878                 }
1879                 if (!found_extent) {
1880                         ret = remove_extent_backref(trans, extent_root, path);
1881                         BUG_ON(ret);
1882                         btrfs_release_path(extent_root, path);
1883                         ret = btrfs_search_slot(trans, extent_root,
1884                                                 &key, path, -1, 1);
1885                         BUG_ON(ret);
1886                         extent_slot = path->slots[0];
1887                 }
1888         } else {
1889                 btrfs_print_leaf(extent_root, path->nodes[0]);
1890                 WARN_ON(1);
1891                 printk("Unable to find ref byte nr %Lu root %Lu "
1892                        "gen %Lu owner %Lu\n", bytenr,
1893                        root_objectid, ref_generation, owner_objectid);
1894         }
1895
1896         leaf = path->nodes[0];
1897         ei = btrfs_item_ptr(leaf, extent_slot,
1898                             struct btrfs_extent_item);
1899         refs = btrfs_extent_refs(leaf, ei);
1900         BUG_ON(refs == 0);
1901         refs -= 1;
1902         btrfs_set_extent_refs(leaf, ei, refs);
1903
1904         btrfs_mark_buffer_dirty(leaf);
1905
1906         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1907                 struct btrfs_extent_ref *ref;
1908                 ref = btrfs_item_ptr(leaf, path->slots[0],
1909                                      struct btrfs_extent_ref);
1910                 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
1911                 /* if the back ref and the extent are next to each other
1912                  * they get deleted below in one shot
1913                  */
1914                 path->slots[0] = extent_slot;
1915                 num_to_del = 2;
1916         } else if (found_extent) {
1917                 /* otherwise delete the extent back ref */
1918                 ret = remove_extent_backref(trans, extent_root, path);
1919                 BUG_ON(ret);
1920                 /* if refs are 0, we need to setup the path for deletion */
1921                 if (refs == 0) {
1922                         btrfs_release_path(extent_root, path);
1923                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1924                                                 -1, 1);
1925                         BUG_ON(ret);
1926                 }
1927         }
1928
1929         if (refs == 0) {
1930                 u64 super_used;
1931                 u64 root_used;
1932 #ifdef BIO_RW_DISCARD
1933                 u64 map_length = num_bytes;
1934                 struct btrfs_multi_bio *multi = NULL;
1935 #endif
1936
1937                 if (pin) {
1938                         ret = pin_down_bytes(trans, root, bytenr, num_bytes,
1939                                 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
1940                         if (ret > 0)
1941                                 mark_free = 1;
1942                         BUG_ON(ret < 0);
1943                 }
1944
1945                 /* block accounting for super block */
1946                 spin_lock_irq(&info->delalloc_lock);
1947                 super_used = btrfs_super_bytes_used(&info->super_copy);
1948                 btrfs_set_super_bytes_used(&info->super_copy,
1949                                            super_used - num_bytes);
1950                 spin_unlock_irq(&info->delalloc_lock);
1951
1952                 /* block accounting for root item */
1953                 root_used = btrfs_root_used(&root->root_item);
1954                 btrfs_set_root_used(&root->root_item,
1955                                            root_used - num_bytes);
1956                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1957                                       num_to_del);
1958                 BUG_ON(ret);
1959                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1960                                          mark_free);
1961                 BUG_ON(ret);
1962
1963 #ifdef BIO_RW_DISCARD
1964                 /* Tell the block device(s) that the sectors can be discarded */
1965                 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
1966                                       bytenr, &map_length, &multi, 0);
1967                 if (!ret) {
1968                         struct btrfs_bio_stripe *stripe = multi->stripes;
1969                         int i;
1970
1971                         if (map_length > num_bytes)
1972                                 map_length = num_bytes;
1973
1974                         for (i = 0; i < multi->num_stripes; i++, stripe++) {
1975                                 blkdev_issue_discard(stripe->dev->bdev,
1976                                                      stripe->physical >> 9,
1977                                                      map_length >> 9);
1978                         }
1979                         kfree(multi);
1980                 }
1981 #endif
1982         }
1983         btrfs_free_path(path);
1984         finish_current_insert(trans, extent_root);
1985         return ret;
1986 }
1987
1988 /*
1989  * find all the blocks marked as pending in the radix tree and remove
1990  * them from the extent map
1991  */
1992 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1993                                btrfs_root *extent_root)
1994 {
1995         int ret;
1996         int err = 0;
1997         int mark_free = 0;
1998         u64 start;
1999         u64 end;
2000         u64 priv;
2001         struct extent_io_tree *pending_del;
2002         struct extent_io_tree *extent_ins;
2003         struct pending_extent_op *extent_op;
2004
2005         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
2006         extent_ins = &extent_root->fs_info->extent_ins;
2007         pending_del = &extent_root->fs_info->pending_del;
2008
2009         while(1) {
2010                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
2011                                             EXTENT_LOCKED);
2012                 if (ret)
2013                         break;
2014
2015                 ret = get_state_private(pending_del, start, &priv);
2016                 BUG_ON(ret);
2017                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2018
2019                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
2020                                   GFP_NOFS);
2021
2022                 ret = pin_down_bytes(trans, extent_root, start,
2023                                      end + 1 - start, 0);
2024                 mark_free = ret > 0;
2025                 if (!test_range_bit(extent_ins, start, end,
2026                                     EXTENT_LOCKED, 0)) {
2027 free_extent:
2028                         ret = __free_extent(trans, extent_root,
2029                                             start, end + 1 - start,
2030                                             extent_op->orig_parent,
2031                                             extent_root->root_key.objectid,
2032                                             extent_op->orig_generation,
2033                                             extent_op->level, 0, mark_free);
2034                         kfree(extent_op);
2035                 } else {
2036                         kfree(extent_op);
2037                         ret = get_state_private(extent_ins, start, &priv);
2038                         BUG_ON(ret);
2039                         extent_op = (struct pending_extent_op *)
2040                                                         (unsigned long)priv;
2041
2042                         clear_extent_bits(extent_ins, start, end,
2043                                           EXTENT_LOCKED, GFP_NOFS);
2044
2045                         if (extent_op->type == PENDING_BACKREF_UPDATE)
2046                                 goto free_extent;
2047
2048                         ret = update_block_group(trans, extent_root, start,
2049                                                 end + 1 - start, 0, mark_free);
2050                         BUG_ON(ret);
2051                         kfree(extent_op);
2052                 }
2053                 if (ret)
2054                         err = ret;
2055
2056                 if (need_resched()) {
2057                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2058                         cond_resched();
2059                         mutex_lock(&extent_root->fs_info->alloc_mutex);
2060                 }
2061         }
2062         return err;
2063 }
2064
2065 /*
2066  * remove an extent from the root, returns 0 on success
2067  */
2068 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2069                                struct btrfs_root *root,
2070                                u64 bytenr, u64 num_bytes, u64 parent,
2071                                u64 root_objectid, u64 ref_generation,
2072                                u64 owner_objectid, int pin)
2073 {
2074         struct btrfs_root *extent_root = root->fs_info->extent_root;
2075         int pending_ret;
2076         int ret;
2077
2078         WARN_ON(num_bytes < root->sectorsize);
2079         if (root == extent_root) {
2080                 struct pending_extent_op *extent_op;
2081
2082                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2083                 BUG_ON(!extent_op);
2084
2085                 extent_op->type = PENDING_EXTENT_DELETE;
2086                 extent_op->bytenr = bytenr;
2087                 extent_op->num_bytes = num_bytes;
2088                 extent_op->parent = parent;
2089                 extent_op->orig_parent = parent;
2090                 extent_op->generation = ref_generation;
2091                 extent_op->orig_generation = ref_generation;
2092                 extent_op->level = (int)owner_objectid;
2093
2094                 set_extent_bits(&root->fs_info->pending_del,
2095                                 bytenr, bytenr + num_bytes - 1,
2096                                 EXTENT_LOCKED, GFP_NOFS);
2097                 set_state_private(&root->fs_info->pending_del,
2098                                   bytenr, (unsigned long)extent_op);
2099                 return 0;
2100         }
2101         /* if metadata always pin */
2102         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2103                 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2104                         struct btrfs_block_group_cache *cache;
2105
2106                         /* btrfs_free_reserved_extent */
2107                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
2108                         BUG_ON(!cache);
2109                         btrfs_add_free_space(cache, bytenr, num_bytes);
2110                         update_reserved_extents(root, bytenr, num_bytes, 0);
2111                         return 0;
2112                 }
2113                 pin = 1;
2114         }
2115
2116         /* if data pin when any transaction has committed this */
2117         if (ref_generation != trans->transid)
2118                 pin = 1;
2119
2120         ret = __free_extent(trans, root, bytenr, num_bytes, parent,
2121                             root_objectid, ref_generation,
2122                             owner_objectid, pin, pin == 0);
2123
2124         finish_current_insert(trans, root->fs_info->extent_root);
2125         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
2126         return ret ? ret : pending_ret;
2127 }
2128
2129 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2130                       struct btrfs_root *root,
2131                       u64 bytenr, u64 num_bytes, u64 parent,
2132                       u64 root_objectid, u64 ref_generation,
2133                       u64 owner_objectid, int pin)
2134 {
2135         int ret;
2136
2137         maybe_lock_mutex(root);
2138         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent,
2139                                   root_objectid, ref_generation,
2140                                   owner_objectid, pin);
2141         maybe_unlock_mutex(root);
2142         return ret;
2143 }
2144
2145 static u64 stripe_align(struct btrfs_root *root, u64 val)
2146 {
2147         u64 mask = ((u64)root->stripesize - 1);
2148         u64 ret = (val + mask) & ~mask;
2149         return ret;
2150 }
2151
2152 /*
2153  * walks the btree of allocated extents and find a hole of a given size.
2154  * The key ins is changed to record the hole:
2155  * ins->objectid == block start
2156  * ins->flags = BTRFS_EXTENT_ITEM_KEY
2157  * ins->offset == number of blocks
2158  * Any available blocks before search_start are skipped.
2159  */
2160 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2161                                      struct btrfs_root *orig_root,
2162                                      u64 num_bytes, u64 empty_size,
2163                                      u64 search_start, u64 search_end,
2164                                      u64 hint_byte, struct btrfs_key *ins,
2165                                      u64 exclude_start, u64 exclude_nr,
2166                                      int data)
2167 {
2168         int ret = 0;
2169         struct btrfs_root * root = orig_root->fs_info->extent_root;
2170         u64 total_needed = num_bytes;
2171         u64 *last_ptr = NULL;
2172         struct btrfs_block_group_cache *block_group = NULL;
2173         int chunk_alloc_done = 0;
2174         int empty_cluster = 2 * 1024 * 1024;
2175         int allowed_chunk_alloc = 0;
2176         struct list_head *head = NULL, *cur = NULL;
2177         int loop = 0;
2178         struct btrfs_space_info *space_info;
2179
2180         WARN_ON(num_bytes < root->sectorsize);
2181         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
2182         ins->objectid = 0;
2183         ins->offset = 0;
2184
2185         if (orig_root->ref_cows || empty_size)
2186                 allowed_chunk_alloc = 1;
2187
2188         if (data & BTRFS_BLOCK_GROUP_METADATA) {
2189                 last_ptr = &root->fs_info->last_alloc;
2190                 empty_cluster = 256 * 1024;
2191         }
2192
2193         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
2194                 last_ptr = &root->fs_info->last_data_alloc;
2195
2196         if (last_ptr) {
2197                 if (*last_ptr)
2198                         hint_byte = *last_ptr;
2199                 else
2200                         empty_size += empty_cluster;
2201         }
2202         search_start = max(search_start, first_logical_byte(root, 0));
2203         search_start = max(search_start, hint_byte);
2204         total_needed += empty_size;
2205
2206         block_group = btrfs_lookup_block_group(root->fs_info, search_start);
2207         space_info = __find_space_info(root->fs_info, data);
2208
2209         down_read(&space_info->groups_sem);
2210         while (1) {
2211                 struct btrfs_free_space *free_space;
2212                 /*
2213                  * the only way this happens if our hint points to a block
2214                  * group thats not of the proper type, while looping this
2215                  * should never happen
2216                  */
2217                 if (unlikely(!block_group_bits(block_group, data)))
2218                         goto new_group;
2219
2220                 ret = cache_block_group(root, block_group);
2221                 if (ret)
2222                         break;
2223
2224                 if (block_group->ro)
2225                         goto new_group;
2226
2227                 free_space = btrfs_find_free_space(block_group, search_start,
2228                                                    total_needed);
2229                 if (free_space) {
2230                         u64 start = block_group->key.objectid;
2231                         u64 end = block_group->key.objectid +
2232                                 block_group->key.offset;
2233
2234                         search_start = stripe_align(root, free_space->offset);
2235
2236                         /* move on to the next group */
2237                         if (search_start + num_bytes >= search_end)
2238                                 goto new_group;
2239
2240                         /* move on to the next group */
2241                         if (search_start + num_bytes > end)
2242                                 goto new_group;
2243
2244                         if (exclude_nr > 0 &&
2245                             (search_start + num_bytes > exclude_start &&
2246                              search_start < exclude_start + exclude_nr)) {
2247                                 search_start = exclude_start + exclude_nr;
2248                                 /*
2249                                  * if search_start is still in this block group
2250                                  * then we just re-search this block group
2251                                  */
2252                                 if (search_start >= start &&
2253                                     search_start < end)
2254                                         continue;
2255
2256                                 /* else we go to the next block group */
2257                                 goto new_group;
2258                         }
2259
2260                         ins->objectid = search_start;
2261                         ins->offset = num_bytes;
2262                         /* we are all good, lets return */
2263                         break;
2264                 }
2265 new_group:
2266                 /*
2267                  * Here's how this works.
2268                  * loop == 0: we were searching a block group via a hint
2269                  *              and didn't find anything, so we start at
2270                  *              the head of the block groups and keep searching
2271                  * loop == 1: we're searching through all of the block groups
2272                  *              if we hit the head again we have searched
2273                  *              all of the block groups for this space and we
2274                  *              need to try and allocate, if we cant error out.
2275                  * loop == 2: we allocated more space and are looping through
2276                  *              all of the block groups again.
2277                  */
2278                 if (loop == 0) {
2279                         head = &space_info->block_groups;
2280                         cur = head->next;
2281
2282                         if (last_ptr && *last_ptr) {
2283                                 total_needed += empty_cluster;
2284                                 *last_ptr = 0;
2285                         }
2286                         loop++;
2287                 } else if (loop == 1 && cur == head) {
2288                         if (allowed_chunk_alloc && !chunk_alloc_done) {
2289                                 up_read(&space_info->groups_sem);
2290                                 ret = do_chunk_alloc(trans, root, num_bytes +
2291                                                      2 * 1024 * 1024, data, 1);
2292                                 if (ret < 0)
2293                                         break;
2294                                 down_read(&space_info->groups_sem);
2295                                 loop++;
2296                                 head = &space_info->block_groups;
2297                                 cur = head->next;
2298                                 chunk_alloc_done = 1;
2299                         } else if (!allowed_chunk_alloc) {
2300                                 space_info->force_alloc = 1;
2301                                 break;
2302                         } else {
2303                                 break;
2304                         }
2305                 } else if (cur == head) {
2306                         break;
2307                 }
2308
2309                 block_group = list_entry(cur, struct btrfs_block_group_cache,
2310                                          list);
2311                 search_start = block_group->key.objectid;
2312                 cur = cur->next;
2313         }
2314
2315         /* we found what we needed */
2316         if (ins->objectid) {
2317                 if (!(data & BTRFS_BLOCK_GROUP_DATA))
2318                         trans->block_group = block_group;
2319
2320                 if (last_ptr)
2321                         *last_ptr = ins->objectid + ins->offset;
2322                 ret = 0;
2323         } else if (!ret) {
2324                 ret = -ENOSPC;
2325         }
2326
2327         up_read(&space_info->groups_sem);
2328         return ret;
2329 }
2330
2331 static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
2332 {
2333         struct btrfs_block_group_cache *cache;
2334         struct list_head *l;
2335
2336         printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
2337                info->total_bytes - info->bytes_used - info->bytes_pinned -
2338                info->bytes_reserved, (info->full) ? "" : "not ");
2339
2340         down_read(&info->groups_sem);
2341         list_for_each(l, &info->block_groups) {
2342                 cache = list_entry(l, struct btrfs_block_group_cache, list);
2343                 spin_lock(&cache->lock);
2344                 printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
2345                        "%Lu pinned %Lu reserved\n",
2346                        cache->key.objectid, cache->key.offset,
2347                        btrfs_block_group_used(&cache->item),
2348                        cache->pinned, cache->reserved);
2349                 btrfs_dump_free_space(cache, bytes);
2350                 spin_unlock(&cache->lock);
2351         }
2352         up_read(&info->groups_sem);
2353 }
2354
2355 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2356                                   struct btrfs_root *root,
2357                                   u64 num_bytes, u64 min_alloc_size,
2358                                   u64 empty_size, u64 hint_byte,
2359                                   u64 search_end, struct btrfs_key *ins,
2360                                   u64 data)
2361 {
2362         int ret;
2363         u64 search_start = 0;
2364         u64 alloc_profile;
2365         struct btrfs_fs_info *info = root->fs_info;
2366         struct btrfs_block_group_cache *cache;
2367
2368         if (data) {
2369                 alloc_profile = info->avail_data_alloc_bits &
2370                                 info->data_alloc_profile;
2371                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2372         } else if (root == root->fs_info->chunk_root) {
2373                 alloc_profile = info->avail_system_alloc_bits &
2374                                 info->system_alloc_profile;
2375                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2376         } else {
2377                 alloc_profile = info->avail_metadata_alloc_bits &
2378                                 info->metadata_alloc_profile;
2379                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2380         }
2381 again:
2382         data = reduce_alloc_profile(root, data);
2383         /*
2384          * the only place that sets empty_size is btrfs_realloc_node, which
2385          * is not called recursively on allocations
2386          */
2387         if (empty_size || root->ref_cows) {
2388                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
2389                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2390                                      2 * 1024 * 1024,
2391                                      BTRFS_BLOCK_GROUP_METADATA |
2392                                      (info->metadata_alloc_profile &
2393                                       info->avail_metadata_alloc_bits), 0);
2394                 }
2395                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2396                                      num_bytes + 2 * 1024 * 1024, data, 0);
2397         }
2398
2399         WARN_ON(num_bytes < root->sectorsize);
2400         ret = find_free_extent(trans, root, num_bytes, empty_size,
2401                                search_start, search_end, hint_byte, ins,
2402                                trans->alloc_exclude_start,
2403                                trans->alloc_exclude_nr, data);
2404
2405         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
2406                 num_bytes = num_bytes >> 1;
2407                 num_bytes = num_bytes & ~(root->sectorsize - 1);
2408                 num_bytes = max(num_bytes, min_alloc_size);
2409                 do_chunk_alloc(trans, root->fs_info->extent_root,
2410                                num_bytes, data, 1);
2411                 goto again;
2412         }
2413         if (ret) {
2414                 struct btrfs_space_info *sinfo;
2415
2416                 sinfo = __find_space_info(root->fs_info, data);
2417                 printk("allocation failed flags %Lu, wanted %Lu\n",
2418                        data, num_bytes);
2419                 dump_space_info(sinfo, num_bytes);
2420                 BUG();
2421         }
2422         cache = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2423         if (!cache) {
2424                 printk(KERN_ERR "Unable to find block group for %Lu\n", ins->objectid);
2425                 return -ENOSPC;
2426         }
2427
2428         ret = btrfs_remove_free_space(cache, ins->objectid, ins->offset);
2429
2430         return ret;
2431 }
2432
2433 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
2434 {
2435         struct btrfs_block_group_cache *cache;
2436
2437         maybe_lock_mutex(root);
2438         cache = btrfs_lookup_block_group(root->fs_info, start);
2439         if (!cache) {
2440                 printk(KERN_ERR "Unable to find block group for %Lu\n", start);
2441                 maybe_unlock_mutex(root);
2442                 return -ENOSPC;
2443         }
2444         btrfs_add_free_space(cache, start, len);
2445         update_reserved_extents(root, start, len, 0);
2446         maybe_unlock_mutex(root);
2447         return 0;
2448 }
2449
2450 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2451                                   struct btrfs_root *root,
2452                                   u64 num_bytes, u64 min_alloc_size,
2453                                   u64 empty_size, u64 hint_byte,
2454                                   u64 search_end, struct btrfs_key *ins,
2455                                   u64 data)
2456 {
2457         int ret;
2458         maybe_lock_mutex(root);
2459         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
2460                                      empty_size, hint_byte, search_end, ins,
2461                                      data);
2462         update_reserved_extents(root, ins->objectid, ins->offset, 1);
2463         maybe_unlock_mutex(root);
2464         return ret;
2465 }
2466
2467 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2468                                          struct btrfs_root *root, u64 parent,
2469                                          u64 root_objectid, u64 ref_generation,
2470                                          u64 owner, struct btrfs_key *ins)
2471 {
2472         int ret;
2473         int pending_ret;
2474         u64 super_used;
2475         u64 root_used;
2476         u64 num_bytes = ins->offset;
2477         u32 sizes[2];
2478         struct btrfs_fs_info *info = root->fs_info;
2479         struct btrfs_root *extent_root = info->extent_root;
2480         struct btrfs_extent_item *extent_item;
2481         struct btrfs_extent_ref *ref;
2482         struct btrfs_path *path;
2483         struct btrfs_key keys[2];
2484
2485         if (parent == 0)
2486                 parent = ins->objectid;
2487
2488         /* block accounting for super block */
2489         spin_lock_irq(&info->delalloc_lock);
2490         super_used = btrfs_super_bytes_used(&info->super_copy);
2491         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2492         spin_unlock_irq(&info->delalloc_lock);
2493
2494         /* block accounting for root item */
2495         root_used = btrfs_root_used(&root->root_item);
2496         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
2497
2498         if (root == extent_root) {
2499                 struct pending_extent_op *extent_op;
2500
2501                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2502                 BUG_ON(!extent_op);
2503
2504                 extent_op->type = PENDING_EXTENT_INSERT;
2505                 extent_op->bytenr = ins->objectid;
2506                 extent_op->num_bytes = ins->offset;
2507                 extent_op->parent = parent;
2508                 extent_op->orig_parent = 0;
2509                 extent_op->generation = ref_generation;
2510                 extent_op->orig_generation = 0;
2511                 extent_op->level = (int)owner;
2512
2513                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2514                                 ins->objectid + ins->offset - 1,
2515                                 EXTENT_LOCKED, GFP_NOFS);
2516                 set_state_private(&root->fs_info->extent_ins,
2517                                   ins->objectid, (unsigned long)extent_op);
2518                 goto update_block;
2519         }
2520
2521         memcpy(&keys[0], ins, sizeof(*ins));
2522         keys[1].objectid = ins->objectid;
2523         keys[1].type = BTRFS_EXTENT_REF_KEY;
2524         keys[1].offset = parent;
2525         sizes[0] = sizeof(*extent_item);
2526         sizes[1] = sizeof(*ref);
2527
2528         path = btrfs_alloc_path();
2529         BUG_ON(!path);
2530
2531         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2532                                        sizes, 2);
2533         BUG_ON(ret);
2534
2535         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2536                                      struct btrfs_extent_item);
2537         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2538         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2539                              struct btrfs_extent_ref);
2540
2541         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2542         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2543         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2544         btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
2545
2546         btrfs_mark_buffer_dirty(path->nodes[0]);
2547
2548         trans->alloc_exclude_start = 0;
2549         trans->alloc_exclude_nr = 0;
2550         btrfs_free_path(path);
2551         finish_current_insert(trans, extent_root);
2552         pending_ret = del_pending_extents(trans, extent_root);
2553
2554         if (ret)
2555                 goto out;
2556         if (pending_ret) {
2557                 ret = pending_ret;
2558                 goto out;
2559         }
2560
2561 update_block:
2562         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
2563         if (ret) {
2564                 printk("update block group failed for %Lu %Lu\n",
2565                        ins->objectid, ins->offset);
2566                 BUG();
2567         }
2568 out:
2569         return ret;
2570 }
2571
2572 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2573                                 struct btrfs_root *root, u64 parent,
2574                                 u64 root_objectid, u64 ref_generation,
2575                                 u64 owner, struct btrfs_key *ins)
2576 {
2577         int ret;
2578
2579         if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
2580                 return 0;
2581         maybe_lock_mutex(root);
2582         ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
2583                                             ref_generation, owner, ins);
2584         update_reserved_extents(root, ins->objectid, ins->offset, 0);
2585         maybe_unlock_mutex(root);
2586         return ret;
2587 }
2588
2589 /*
2590  * this is used by the tree logging recovery code.  It records that
2591  * an extent has been allocated and makes sure to clear the free
2592  * space cache bits as well
2593  */
2594 int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
2595                                 struct btrfs_root *root, u64 parent,
2596                                 u64 root_objectid, u64 ref_generation,
2597                                 u64 owner, struct btrfs_key *ins)
2598 {
2599         int ret;
2600         struct btrfs_block_group_cache *block_group;
2601
2602         maybe_lock_mutex(root);
2603         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2604         cache_block_group(root, block_group);
2605
2606         ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset);
2607         BUG_ON(ret);
2608         ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
2609                                             ref_generation, owner, ins);
2610         maybe_unlock_mutex(root);
2611         return ret;
2612 }
2613
2614 /*
2615  * finds a free extent and does all the dirty work required for allocation
2616  * returns the key for the extent through ins, and a tree buffer for
2617  * the first block of the extent through buf.
2618  *
2619  * returns 0 if everything worked, non-zero otherwise.
2620  */
2621 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2622                        struct btrfs_root *root,
2623                        u64 num_bytes, u64 parent, u64 min_alloc_size,
2624                        u64 root_objectid, u64 ref_generation,
2625                        u64 owner_objectid, u64 empty_size, u64 hint_byte,
2626                        u64 search_end, struct btrfs_key *ins, u64 data)
2627 {
2628         int ret;
2629
2630         maybe_lock_mutex(root);
2631
2632         ret = __btrfs_reserve_extent(trans, root, num_bytes,
2633                                      min_alloc_size, empty_size, hint_byte,
2634                                      search_end, ins, data);
2635         BUG_ON(ret);
2636         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
2637                 ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2638                                         root_objectid, ref_generation,
2639                                         owner_objectid, ins);
2640                 BUG_ON(ret);
2641
2642         } else {
2643                 update_reserved_extents(root, ins->objectid, ins->offset, 1);
2644         }
2645         maybe_unlock_mutex(root);
2646         return ret;
2647 }
2648
2649 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
2650                                             struct btrfs_root *root,
2651                                             u64 bytenr, u32 blocksize)
2652 {
2653         struct extent_buffer *buf;
2654
2655         buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
2656         if (!buf)
2657                 return ERR_PTR(-ENOMEM);
2658         btrfs_set_header_generation(buf, trans->transid);
2659         btrfs_tree_lock(buf);
2660         clean_tree_block(trans, root, buf);
2661         btrfs_set_buffer_uptodate(buf);
2662         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2663                 set_extent_dirty(&root->dirty_log_pages, buf->start,
2664                          buf->start + buf->len - 1, GFP_NOFS);
2665         } else {
2666                 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2667                          buf->start + buf->len - 1, GFP_NOFS);
2668         }
2669         trans->blocks_used++;
2670         return buf;
2671 }
2672
2673 /*
2674  * helper function to allocate a block for a given tree
2675  * returns the tree buffer or NULL.
2676  */
2677 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2678                                              struct btrfs_root *root,
2679                                              u32 blocksize, u64 parent,
2680                                              u64 root_objectid,
2681                                              u64 ref_generation,
2682                                              int level,
2683                                              u64 hint,
2684                                              u64 empty_size)
2685 {
2686         struct btrfs_key ins;
2687         int ret;
2688         struct extent_buffer *buf;
2689
2690         ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
2691                                  root_objectid, ref_generation, level,
2692                                  empty_size, hint, (u64)-1, &ins, 0);
2693         if (ret) {
2694                 BUG_ON(ret > 0);
2695                 return ERR_PTR(ret);
2696         }
2697
2698         buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
2699         return buf;
2700 }
2701
2702 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
2703                         struct btrfs_root *root, struct extent_buffer *leaf)
2704 {
2705         u64 leaf_owner;
2706         u64 leaf_generation;
2707         struct btrfs_key key;
2708         struct btrfs_file_extent_item *fi;
2709         int i;
2710         int nritems;
2711         int ret;
2712
2713         BUG_ON(!btrfs_is_leaf(leaf));
2714         nritems = btrfs_header_nritems(leaf);
2715         leaf_owner = btrfs_header_owner(leaf);
2716         leaf_generation = btrfs_header_generation(leaf);
2717
2718         for (i = 0; i < nritems; i++) {
2719                 u64 disk_bytenr;
2720                 cond_resched();
2721
2722                 btrfs_item_key_to_cpu(leaf, &key, i);
2723                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2724                         continue;
2725                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2726                 if (btrfs_file_extent_type(leaf, fi) ==
2727                     BTRFS_FILE_EXTENT_INLINE)
2728                         continue;
2729                 /*
2730                  * FIXME make sure to insert a trans record that
2731                  * repeats the snapshot del on crash
2732                  */
2733                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2734                 if (disk_bytenr == 0)
2735                         continue;
2736
2737                 mutex_lock(&root->fs_info->alloc_mutex);
2738                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2739                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2740                                 leaf->start, leaf_owner, leaf_generation,
2741                                 key.objectid, 0);
2742                 mutex_unlock(&root->fs_info->alloc_mutex);
2743                 BUG_ON(ret);
2744
2745                 atomic_inc(&root->fs_info->throttle_gen);
2746                 wake_up(&root->fs_info->transaction_throttle);
2747                 cond_resched();
2748         }
2749         return 0;
2750 }
2751
2752 static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
2753                                         struct btrfs_root *root,
2754                                         struct btrfs_leaf_ref *ref)
2755 {
2756         int i;
2757         int ret;
2758         struct btrfs_extent_info *info = ref->extents;
2759
2760         for (i = 0; i < ref->nritems; i++) {
2761                 mutex_lock(&root->fs_info->alloc_mutex);
2762                 ret = __btrfs_free_extent(trans, root, info->bytenr,
2763                                           info->num_bytes, ref->bytenr,
2764                                           ref->owner, ref->generation,
2765                                           info->objectid, 0);
2766                 mutex_unlock(&root->fs_info->alloc_mutex);
2767
2768                 atomic_inc(&root->fs_info->throttle_gen);
2769                 wake_up(&root->fs_info->transaction_throttle);
2770                 cond_resched();
2771
2772                 BUG_ON(ret);
2773                 info++;
2774         }
2775
2776         return 0;
2777 }
2778
2779 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2780                               u32 *refs)
2781 {
2782         int ret;
2783
2784         ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs);
2785         BUG_ON(ret);
2786
2787 #if 0 // some debugging code in case we see problems here
2788         /* if the refs count is one, it won't get increased again.  But
2789          * if the ref count is > 1, someone may be decreasing it at
2790          * the same time we are.
2791          */
2792         if (*refs != 1) {
2793                 struct extent_buffer *eb = NULL;
2794                 eb = btrfs_find_create_tree_block(root, start, len);
2795                 if (eb)
2796                         btrfs_tree_lock(eb);
2797
2798                 mutex_lock(&root->fs_info->alloc_mutex);
2799                 ret = lookup_extent_ref(NULL, root, start, len, refs);
2800                 BUG_ON(ret);
2801                 mutex_unlock(&root->fs_info->alloc_mutex);
2802
2803                 if (eb) {
2804                         btrfs_tree_unlock(eb);
2805                         free_extent_buffer(eb);
2806                 }
2807                 if (*refs == 1) {
2808                         printk("block %llu went down to one during drop_snap\n",
2809                                (unsigned long long)start);
2810                 }
2811
2812         }
2813 #endif
2814
2815         cond_resched();
2816         return ret;
2817 }
2818
2819 /*
2820  * helper function for drop_snapshot, this walks down the tree dropping ref
2821  * counts as it goes.
2822  */
2823 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2824                                    struct btrfs_root *root,
2825                                    struct btrfs_path *path, int *level)
2826 {
2827         u64 root_owner;
2828         u64 root_gen;
2829         u64 bytenr;
2830         u64 ptr_gen;
2831         struct extent_buffer *next;
2832         struct extent_buffer *cur;
2833         struct extent_buffer *parent;
2834         struct btrfs_leaf_ref *ref;
2835         u32 blocksize;
2836         int ret;
2837         u32 refs;
2838
2839         WARN_ON(*level < 0);
2840         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2841         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
2842                                 path->nodes[*level]->len, &refs);
2843         BUG_ON(ret);
2844         if (refs > 1)
2845                 goto out;
2846
2847         /*
2848          * walk down to the last node level and free all the leaves
2849          */
2850         while(*level >= 0) {
2851                 WARN_ON(*level < 0);
2852                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2853                 cur = path->nodes[*level];
2854
2855                 if (btrfs_header_level(cur) != *level)
2856                         WARN_ON(1);
2857
2858                 if (path->slots[*level] >=
2859                     btrfs_header_nritems(cur))
2860                         break;
2861                 if (*level == 0) {
2862                         ret = btrfs_drop_leaf_ref(trans, root, cur);
2863                         BUG_ON(ret);
2864                         break;
2865                 }
2866                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2867                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2868                 blocksize = btrfs_level_size(root, *level - 1);
2869
2870                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
2871                 BUG_ON(ret);
2872                 if (refs != 1) {
2873                         parent = path->nodes[*level];
2874                         root_owner = btrfs_header_owner(parent);
2875                         root_gen = btrfs_header_generation(parent);
2876                         path->slots[*level]++;
2877
2878                         mutex_lock(&root->fs_info->alloc_mutex);
2879                         ret = __btrfs_free_extent(trans, root, bytenr,
2880                                                 blocksize, parent->start,
2881                                                 root_owner, root_gen,
2882                                                 *level - 1, 1);
2883                         BUG_ON(ret);
2884                         mutex_unlock(&root->fs_info->alloc_mutex);
2885
2886                         atomic_inc(&root->fs_info->throttle_gen);
2887                         wake_up(&root->fs_info->transaction_throttle);
2888                         cond_resched();
2889
2890                         continue;
2891                 }
2892                 /*
2893                  * at this point, we have a single ref, and since the
2894                  * only place referencing this extent is a dead root
2895                  * the reference count should never go higher.
2896                  * So, we don't need to check it again
2897                  */
2898                 if (*level == 1) {
2899                         ref = btrfs_lookup_leaf_ref(root, bytenr);
2900                         if (ref && ref->generation != ptr_gen) {
2901                                 btrfs_free_leaf_ref(root, ref);
2902                                 ref = NULL;
2903                         }
2904                         if (ref) {
2905                                 ret = cache_drop_leaf_ref(trans, root, ref);
2906                                 BUG_ON(ret);
2907                                 btrfs_remove_leaf_ref(root, ref);
2908                                 btrfs_free_leaf_ref(root, ref);
2909                                 *level = 0;
2910                                 break;
2911                         }
2912                         if (printk_ratelimit()) {
2913                                 printk("leaf ref miss for bytenr %llu\n",
2914                                        (unsigned long long)bytenr);
2915                         }
2916                 }
2917                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2918                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2919                         free_extent_buffer(next);
2920
2921                         next = read_tree_block(root, bytenr, blocksize,
2922                                                ptr_gen);
2923                         cond_resched();
2924 #if 0
2925                         /*
2926                          * this is a debugging check and can go away
2927                          * the ref should never go all the way down to 1
2928                          * at this point
2929                          */
2930                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2931                                                 &refs);
2932                         BUG_ON(ret);
2933                         WARN_ON(refs != 1);
2934 #endif
2935                 }
2936                 WARN_ON(*level <= 0);
2937                 if (path->nodes[*level-1])
2938                         free_extent_buffer(path->nodes[*level-1]);
2939                 path->nodes[*level-1] = next;
2940                 *level = btrfs_header_level(next);
2941                 path->slots[*level] = 0;
2942                 cond_resched();
2943         }
2944 out:
2945         WARN_ON(*level < 0);
2946         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2947
2948         if (path->nodes[*level] == root->node) {
2949                 parent = path->nodes[*level];
2950                 bytenr = path->nodes[*level]->start;
2951         } else {
2952                 parent = path->nodes[*level + 1];
2953                 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
2954         }
2955
2956         blocksize = btrfs_level_size(root, *level);
2957         root_owner = btrfs_header_owner(parent);
2958         root_gen = btrfs_header_generation(parent);
2959
2960         mutex_lock(&root->fs_info->alloc_mutex);
2961         ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
2962                                   parent->start, root_owner, root_gen,
2963                                   *level, 1);
2964         mutex_unlock(&root->fs_info->alloc_mutex);
2965         free_extent_buffer(path->nodes[*level]);
2966         path->nodes[*level] = NULL;
2967         *level += 1;
2968         BUG_ON(ret);
2969
2970         cond_resched();
2971         return 0;
2972 }
2973
2974 /*
2975  * helper function for drop_subtree, this function is similar to
2976  * walk_down_tree. The main difference is that it checks reference
2977  * counts while tree blocks are locked.
2978  */
2979 static int noinline walk_down_subtree(struct btrfs_trans_handle *trans,
2980                                       struct btrfs_root *root,
2981                                       struct btrfs_path *path, int *level)
2982 {
2983         struct extent_buffer *next;
2984         struct extent_buffer *cur;
2985         struct extent_buffer *parent;
2986         u64 bytenr;
2987         u64 ptr_gen;
2988         u32 blocksize;
2989         u32 refs;
2990         int ret;
2991
2992         cur = path->nodes[*level];
2993         ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len,
2994                                       &refs);
2995         BUG_ON(ret);
2996         if (refs > 1)
2997                 goto out;
2998
2999         while (*level >= 0) {
3000                 cur = path->nodes[*level];
3001                 if (*level == 0) {
3002                         ret = btrfs_drop_leaf_ref(trans, root, cur);
3003                         BUG_ON(ret);
3004                         clean_tree_block(trans, root, cur);
3005                         break;
3006                 }
3007                 if (path->slots[*level] >= btrfs_header_nritems(cur)) {
3008                         clean_tree_block(trans, root, cur);
3009                         break;
3010                 }
3011
3012                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
3013                 blocksize = btrfs_level_size(root, *level - 1);
3014                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
3015
3016                 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
3017                 btrfs_tree_lock(next);
3018
3019                 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
3020                                               &refs);
3021                 BUG_ON(ret);
3022                 if (refs > 1) {
3023                         parent = path->nodes[*level];
3024                         ret = btrfs_free_extent(trans, root, bytenr,
3025                                         blocksize, parent->start,
3026                                         btrfs_header_owner(parent),
3027                                         btrfs_header_generation(parent),
3028                                         *level - 1, 1);
3029                         BUG_ON(ret);
3030                         path->slots[*level]++;
3031                         btrfs_tree_unlock(next);
3032                         free_extent_buffer(next);
3033                         continue;
3034                 }
3035
3036                 *level = btrfs_header_level(next);
3037                 path->nodes[*level] = next;
3038                 path->slots[*level] = 0;
3039                 path->locks[*level] = 1;
3040                 cond_resched();
3041         }
3042 out:
3043         parent = path->nodes[*level + 1];
3044         bytenr = path->nodes[*level]->start;
3045         blocksize = path->nodes[*level]->len;
3046
3047         ret = btrfs_free_extent(trans, root, bytenr, blocksize,
3048                         parent->start, btrfs_header_owner(parent),
3049                         btrfs_header_generation(parent), *level, 1);
3050         BUG_ON(ret);
3051
3052         if (path->locks[*level]) {
3053                 btrfs_tree_unlock(path->nodes[*level]);
3054                 path->locks[*level] = 0;
3055         }
3056         free_extent_buffer(path->nodes[*level]);
3057         path->nodes[*level] = NULL;
3058         *level += 1;
3059         cond_resched();
3060         return 0;
3061 }
3062
3063 /*
3064  * helper for dropping snapshots.  This walks back up the tree in the path
3065  * to find the first node higher up where we haven't yet gone through
3066  * all the slots
3067  */
3068 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3069                                  struct btrfs_root *root,
3070                                  struct btrfs_path *path,
3071                                  int *level, int max_level)
3072 {
3073         u64 root_owner;
3074         u64 root_gen;
3075         struct btrfs_root_item *root_item = &root->root_item;
3076         int i;
3077         int slot;
3078         int ret;
3079
3080         for (i = *level; i < max_level && path->nodes[i]; i++) {
3081                 slot = path->slots[i];
3082                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3083                         struct extent_buffer *node;
3084                         struct btrfs_disk_key disk_key;
3085                         node = path->nodes[i];
3086                         path->slots[i]++;
3087                         *level = i;
3088                         WARN_ON(*level == 0);
3089                         btrfs_node_key(node, &disk_key, path->slots[i]);
3090                         memcpy(&root_item->drop_progress,
3091                                &disk_key, sizeof(disk_key));
3092                         root_item->drop_level = i;
3093                         return 0;
3094                 } else {
3095                         struct extent_buffer *parent;
3096                         if (path->nodes[*level] == root->node)
3097                                 parent = path->nodes[*level];
3098                         else
3099                                 parent = path->nodes[*level + 1];
3100
3101                         root_owner = btrfs_header_owner(parent);
3102                         root_gen = btrfs_header_generation(parent);
3103
3104                         clean_tree_block(trans, root, path->nodes[*level]);
3105                         ret = btrfs_free_extent(trans, root,
3106                                                 path->nodes[*level]->start,
3107                                                 path->nodes[*level]->len,
3108                                                 parent->start, root_owner,
3109                                                 root_gen, *level, 1);
3110                         BUG_ON(ret);
3111                         if (path->locks[*level]) {
3112                                 btrfs_tree_unlock(path->nodes[*level]);
3113                                 path->locks[*level] = 0;
3114                         }
3115                         free_extent_buffer(path->nodes[*level]);
3116                         path->nodes[*level] = NULL;
3117                         *level = i + 1;
3118                 }
3119         }
3120         return 1;
3121 }
3122
3123 /*
3124  * drop the reference count on the tree rooted at 'snap'.  This traverses
3125  * the tree freeing any blocks that have a ref count of zero after being
3126  * decremented.
3127  */
3128 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3129                         *root)
3130 {
3131         int ret = 0;
3132         int wret;
3133         int level;
3134         struct btrfs_path *path;
3135         int i;
3136         int orig_level;
3137         struct btrfs_root_item *root_item = &root->root_item;
3138
3139         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
3140         path = btrfs_alloc_path();
3141         BUG_ON(!path);
3142
3143         level = btrfs_header_level(root->node);
3144         orig_level = level;
3145         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3146                 path->nodes[level] = root->node;
3147                 extent_buffer_get(root->node);
3148                 path->slots[level] = 0;
3149         } else {
3150                 struct btrfs_key key;
3151                 struct btrfs_disk_key found_key;
3152                 struct extent_buffer *node;
3153
3154                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3155                 level = root_item->drop_level;
3156                 path->lowest_level = level;
3157                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3158                 if (wret < 0) {
3159                         ret = wret;
3160                         goto out;
3161                 }
3162                 node = path->nodes[level];
3163                 btrfs_node_key(node, &found_key, path->slots[level]);
3164                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3165                                sizeof(found_key)));
3166                 /*
3167                  * unlock our path, this is safe because only this
3168                  * function is allowed to delete this snapshot
3169                  */
3170                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
3171                         if (path->nodes[i] && path->locks[i]) {
3172                                 path->locks[i] = 0;
3173                                 btrfs_tree_unlock(path->nodes[i]);
3174                         }
3175                 }
3176         }
3177         while(1) {
3178                 wret = walk_down_tree(trans, root, path, &level);
3179                 if (wret > 0)
3180                         break;
3181                 if (wret < 0)
3182                         ret = wret;
3183
3184                 wret = walk_up_tree(trans, root, path, &level,
3185                                     BTRFS_MAX_LEVEL);
3186                 if (wret > 0)
3187                         break;
3188                 if (wret < 0)
3189                         ret = wret;
3190                 if (trans->transaction->in_commit) {
3191                         ret = -EAGAIN;
3192                         break;
3193                 }
3194                 atomic_inc(&root->fs_info->throttle_gen);
3195                 wake_up(&root->fs_info->transaction_throttle);
3196         }
3197         for (i = 0; i <= orig_level; i++) {
3198                 if (path->nodes[i]) {
3199                         free_extent_buffer(path->nodes[i]);
3200                         path->nodes[i] = NULL;
3201                 }
3202         }
3203 out:
3204         btrfs_free_path(path);
3205         return ret;
3206 }
3207
3208 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
3209                         struct btrfs_root *root,
3210                         struct extent_buffer *node,
3211                         struct extent_buffer *parent)
3212 {
3213         struct btrfs_path *path;
3214         int level;
3215         int parent_level;
3216         int ret = 0;
3217         int wret;
3218
3219         path = btrfs_alloc_path();
3220         BUG_ON(!path);
3221
3222         BUG_ON(!btrfs_tree_locked(parent));
3223         parent_level = btrfs_header_level(parent);
3224         extent_buffer_get(parent);
3225         path->nodes[parent_level] = parent;
3226         path->slots[parent_level] = btrfs_header_nritems(parent);
3227
3228         BUG_ON(!btrfs_tree_locked(node));
3229         level = btrfs_header_level(node);
3230         extent_buffer_get(node);
3231         path->nodes[level] = node;
3232         path->slots[level] = 0;
3233
3234         while (1) {
3235                 wret = walk_down_subtree(trans, root, path, &level);
3236                 if (wret < 0)
3237                         ret = wret;
3238                 if (wret != 0)
3239                         break;
3240
3241                 wret = walk_up_tree(trans, root, path, &level, parent_level);
3242                 if (wret < 0)
3243                         ret = wret;
3244                 if (wret != 0)
3245                         break;
3246         }
3247
3248         btrfs_free_path(path);
3249         return ret;
3250 }
3251
3252 static unsigned long calc_ra(unsigned long start, unsigned long last,
3253                              unsigned long nr)
3254 {
3255         return min(last, start + nr - 1);
3256 }
3257
3258 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
3259                                          u64 len)
3260 {
3261         u64 page_start;
3262         u64 page_end;
3263         unsigned long first_index;
3264         unsigned long last_index;
3265         unsigned long i;
3266         struct page *page;
3267         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3268         struct file_ra_state *ra;
3269         struct btrfs_ordered_extent *ordered;
3270         unsigned int total_read = 0;
3271         unsigned int total_dirty = 0;
3272         int ret = 0;
3273
3274         ra = kzalloc(sizeof(*ra), GFP_NOFS);
3275
3276         mutex_lock(&inode->i_mutex);
3277         first_index = start >> PAGE_CACHE_SHIFT;
3278         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
3279
3280         /* make sure the dirty trick played by the caller work */
3281         ret = invalidate_inode_pages2_range(inode->i_mapping,
3282                                             first_index, last_index);
3283         if (ret)
3284                 goto out_unlock;
3285
3286         file_ra_state_init(ra, inode->i_mapping);
3287
3288         for (i = first_index ; i <= last_index; i++) {
3289                 if (total_read % ra->ra_pages == 0) {
3290                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
3291                                        calc_ra(i, last_index, ra->ra_pages));
3292                 }
3293                 total_read++;
3294 again:
3295                 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
3296                         BUG_ON(1);
3297                 page = grab_cache_page(inode->i_mapping, i);
3298                 if (!page) {
3299                         ret = -ENOMEM;
3300                         goto out_unlock;
3301                 }
3302                 if (!PageUptodate(page)) {
3303                         btrfs_readpage(NULL, page);
3304                         lock_page(page);
3305                         if (!PageUptodate(page)) {
3306                                 unlock_page(page);
3307                                 page_cache_release(page);
3308                                 ret = -EIO;
3309                                 goto out_unlock;
3310                         }
3311                 }
3312                 wait_on_page_writeback(page);
3313
3314                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
3315                 page_end = page_start + PAGE_CACHE_SIZE - 1;
3316                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
3317
3318                 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3319                 if (ordered) {
3320                         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3321                         unlock_page(page);
3322                         page_cache_release(page);
3323                         btrfs_start_ordered_extent(inode, ordered, 1);
3324                         btrfs_put_ordered_extent(ordered);
3325                         goto again;
3326                 }
3327                 set_page_extent_mapped(page);
3328
3329                 btrfs_set_extent_delalloc(inode, page_start, page_end);
3330                 if (i == first_index)
3331                         set_extent_bits(io_tree, page_start, page_end,
3332                                         EXTENT_BOUNDARY, GFP_NOFS);
3333
3334                 set_page_dirty(page);
3335                 total_dirty++;
3336
3337                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3338                 unlock_page(page);
3339                 page_cache_release(page);
3340         }
3341
3342 out_unlock:
3343         kfree(ra);
3344         mutex_unlock(&inode->i_mutex);
3345         balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
3346         return ret;
3347 }
3348
3349 static int noinline relocate_data_extent(struct inode *reloc_inode,
3350                                          struct btrfs_key *extent_key,
3351                                          u64 offset)
3352 {
3353         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
3354         struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
3355         struct extent_map *em;
3356
3357         em = alloc_extent_map(GFP_NOFS);
3358         BUG_ON(!em || IS_ERR(em));
3359
3360         em->start = extent_key->objectid - offset;
3361         em->len = extent_key->offset;
3362         em->block_len = extent_key->offset;
3363         em->block_start = extent_key->objectid;
3364         em->bdev = root->fs_info->fs_devices->latest_bdev;
3365         set_bit(EXTENT_FLAG_PINNED, &em->flags);
3366
3367         /* setup extent map to cheat btrfs_readpage */
3368         mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex);
3369         while (1) {
3370                 int ret;
3371                 spin_lock(&em_tree->lock);
3372                 ret = add_extent_mapping(em_tree, em);
3373                 spin_unlock(&em_tree->lock);
3374                 if (ret != -EEXIST) {
3375                         free_extent_map(em);
3376                         break;
3377                 }
3378                 btrfs_drop_extent_cache(reloc_inode, em->start,
3379                                         em->start + em->len - 1, 0);
3380         }
3381         mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex);
3382
3383         return relocate_inode_pages(reloc_inode, extent_key->objectid - offset,
3384                                     extent_key->offset);
3385 }
3386
3387 struct btrfs_ref_path {
3388         u64 extent_start;
3389         u64 nodes[BTRFS_MAX_LEVEL];
3390         u64 root_objectid;
3391         u64 root_generation;
3392         u64 owner_objectid;
3393         u32 num_refs;
3394         int lowest_level;
3395         int current_level;
3396         int shared_level;
3397
3398         struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
3399         u64 new_nodes[BTRFS_MAX_LEVEL];
3400 };
3401
3402 struct disk_extent {
3403         u64 ram_bytes;
3404         u64 disk_bytenr;
3405         u64 disk_num_bytes;
3406         u64 offset;
3407         u64 num_bytes;
3408         u8 compression;
3409         u8 encryption;
3410         u16 other_encoding;
3411 };
3412
3413 static int is_cowonly_root(u64 root_objectid)
3414 {
3415         if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
3416             root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
3417             root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
3418             root_objectid == BTRFS_DEV_TREE_OBJECTID ||
3419             root_objectid == BTRFS_TREE_LOG_OBJECTID)
3420                 return 1;
3421         return 0;
3422 }
3423
3424 static int noinline __next_ref_path(struct btrfs_trans_handle *trans,
3425                                     struct btrfs_root *extent_root,
3426                                     struct btrfs_ref_path *ref_path,
3427                                     int first_time)
3428 {
3429         struct extent_buffer *leaf;
3430         struct btrfs_path *path;
3431         struct btrfs_extent_ref *ref;
3432         struct btrfs_key key;
3433         struct btrfs_key found_key;
3434         u64 bytenr;
3435         u32 nritems;
3436         int level;
3437         int ret = 1;
3438
3439         path = btrfs_alloc_path();
3440         if (!path)
3441                 return -ENOMEM;
3442
3443         mutex_lock(&extent_root->fs_info->alloc_mutex);
3444
3445         if (first_time) {
3446                 ref_path->lowest_level = -1;
3447                 ref_path->current_level = -1;
3448                 ref_path->shared_level = -1;
3449                 goto walk_up;
3450         }
3451 walk_down:
3452         level = ref_path->current_level - 1;
3453         while (level >= -1) {
3454                 u64 parent;
3455                 if (level < ref_path->lowest_level)
3456                         break;
3457
3458                 if (level >= 0) {
3459                         bytenr = ref_path->nodes[level];
3460                 } else {
3461                         bytenr = ref_path->extent_start;
3462                 }
3463                 BUG_ON(bytenr == 0);
3464
3465                 parent = ref_path->nodes[level + 1];
3466                 ref_path->nodes[level + 1] = 0;
3467                 ref_path->current_level = level;
3468                 BUG_ON(parent == 0);
3469
3470                 key.objectid = bytenr;
3471                 key.offset = parent + 1;
3472                 key.type = BTRFS_EXTENT_REF_KEY;
3473
3474                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
3475                 if (ret < 0)
3476                         goto out;
3477                 BUG_ON(ret == 0);
3478
3479                 leaf = path->nodes[0];
3480                 nritems = btrfs_header_nritems(leaf);
3481                 if (path->slots[0] >= nritems) {
3482                         ret = btrfs_next_leaf(extent_root, path);
3483                         if (ret < 0)
3484                                 goto out;
3485                         if (ret > 0)
3486                                 goto next;
3487                         leaf = path->nodes[0];
3488                 }
3489
3490                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3491                 if (found_key.objectid == bytenr &&
3492                     found_key.type == BTRFS_EXTENT_REF_KEY) {
3493                         if (level < ref_path->shared_level)
3494                                 ref_path->shared_level = level;
3495                         goto found;
3496                 }
3497 next:
3498                 level--;
3499                 btrfs_release_path(extent_root, path);
3500                 if (need_resched()) {
3501                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
3502                         cond_resched();
3503                         mutex_lock(&extent_root->fs_info->alloc_mutex);
3504                 }
3505         }
3506         /* reached lowest level */
3507         ret = 1;
3508         goto out;
3509 walk_up:
3510         level = ref_path->current_level;
3511         while (level < BTRFS_MAX_LEVEL - 1) {
3512                 u64 ref_objectid;
3513                 if (level >= 0) {
3514                         bytenr = ref_path->nodes[level];
3515                 } else {
3516                         bytenr = ref_path->extent_start;
3517                 }
3518                 BUG_ON(bytenr == 0);
3519
3520                 key.objectid = bytenr;
3521                 key.offset = 0;
3522                 key.type = BTRFS_EXTENT_REF_KEY;
3523
3524                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
3525                 if (ret < 0)
3526                         goto out;
3527
3528                 leaf = path->nodes[0];
3529                 nritems = btrfs_header_nritems(leaf);
3530                 if (path->slots[0] >= nritems) {
3531                         ret = btrfs_next_leaf(extent_root, path);
3532                         if (ret < 0)
3533                                 goto out;
3534                         if (ret > 0) {
3535                                 /* the extent was freed by someone */
3536                                 if (ref_path->lowest_level == level)
3537                                         goto out;
3538                                 btrfs_release_path(extent_root, path);
3539                                 goto walk_down;
3540                         }
3541                         leaf = path->nodes[0];
3542                 }
3543
3544                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3545                 if (found_key.objectid != bytenr ||
3546                                 found_key.type != BTRFS_EXTENT_REF_KEY) {
3547                         /* the extent was freed by someone */
3548                         if (ref_path->lowest_level == level) {
3549                                 ret = 1;
3550                                 goto out;
3551                         }
3552                         btrfs_release_path(extent_root, path);
3553                         goto walk_down;
3554                 }
3555 found:
3556                 ref = btrfs_item_ptr(leaf, path->slots[0],
3557                                 struct btrfs_extent_ref);
3558                 ref_objectid = btrfs_ref_objectid(leaf, ref);
3559                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
3560                         if (first_time) {
3561                                 level = (int)ref_objectid;
3562                                 BUG_ON(level >= BTRFS_MAX_LEVEL);
3563                                 ref_path->lowest_level = level;
3564                                 ref_path->current_level = level;
3565                                 ref_path->nodes[level] = bytenr;
3566                         } else {
3567                                 WARN_ON(ref_objectid != level);
3568                         }
3569                 } else {
3570                         WARN_ON(level != -1);
3571                 }
3572                 first_time = 0;
3573
3574                 if (ref_path->lowest_level == level) {
3575                         ref_path->owner_objectid = ref_objectid;
3576                         ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
3577                 }
3578
3579                 /*
3580                  * the block is tree root or the block isn't in reference
3581                  * counted tree.
3582                  */
3583                 if (found_key.objectid == found_key.offset ||
3584                     is_cowonly_root(btrfs_ref_root(leaf, ref))) {
3585                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
3586                         ref_path->root_generation =
3587                                 btrfs_ref_generation(leaf, ref);
3588                         if (level < 0) {
3589                                 /* special reference from the tree log */
3590                                 ref_path->nodes[0] = found_key.offset;
3591                                 ref_path->current_level = 0;
3592                         }
3593                         ret = 0;
3594                         goto out;
3595                 }
3596
3597                 level++;
3598                 BUG_ON(ref_path->nodes[level] != 0);
3599                 ref_path->nodes[level] = found_key.offset;
3600                 ref_path->current_level = level;
3601
3602                 /*
3603                  * the reference was created in the running transaction,
3604                  * no need to continue walking up.
3605                  */
3606                 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
3607                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
3608                         ref_path->root_generation =
3609                                 btrfs_ref_generation(leaf, ref);
3610                         ret = 0;
3611                         goto out;
3612                 }
3613
3614                 btrfs_release_path(extent_root, path);
3615                 if (need_resched()) {
3616                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
3617                         cond_resched();
3618                         mutex_lock(&extent_root->fs_info->alloc_mutex);
3619                 }
3620         }
3621         /* reached max tree level, but no tree root found. */
3622         BUG();
3623 out:
3624         mutex_unlock(&extent_root->fs_info->alloc_mutex);
3625         btrfs_free_path(path);
3626         return ret;
3627 }
3628
3629 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
3630                                 struct btrfs_root *extent_root,
3631                                 struct btrfs_ref_path *ref_path,
3632                                 u64 extent_start)
3633 {
3634         memset(ref_path, 0, sizeof(*ref_path));
3635         ref_path->extent_start = extent_start;
3636
3637         return __next_ref_path(trans, extent_root, ref_path, 1);
3638 }
3639
3640 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
3641                                struct btrfs_root *extent_root,
3642                                struct btrfs_ref_path *ref_path)
3643 {
3644         return __next_ref_path(trans, extent_root, ref_path, 0);
3645 }
3646
3647 static int noinline get_new_locations(struct inode *reloc_inode,
3648                                       struct btrfs_key *extent_key,
3649                                       u64 offset, int no_fragment,
3650                                       struct disk_extent **extents,
3651                                       int *nr_extents)
3652 {
3653         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
3654         struct btrfs_path *path;
3655         struct btrfs_file_extent_item *fi;
3656         struct extent_buffer *leaf;
3657         struct disk_extent *exts = *extents;
3658         struct btrfs_key found_key;
3659         u64 cur_pos;
3660         u64 last_byte;
3661         u32 nritems;
3662         int nr = 0;
3663         int max = *nr_extents;
3664         int ret;
3665
3666         WARN_ON(!no_fragment && *extents);
3667         if (!exts) {
3668                 max = 1;
3669                 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
3670                 if (!exts)
3671                         return -ENOMEM;
3672         }
3673
3674         path = btrfs_alloc_path();
3675         BUG_ON(!path);
3676
3677         cur_pos = extent_key->objectid - offset;
3678         last_byte = extent_key->objectid + extent_key->offset;
3679         ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
3680                                        cur_pos, 0);
3681         if (ret < 0)
3682                 goto out;
3683         if (ret > 0) {
3684                 ret = -ENOENT;
3685                 goto out;
3686         }
3687
3688         while (1) {
3689                 leaf = path->nodes[0];
3690                 nritems = btrfs_header_nritems(leaf);
3691                 if (path->slots[0] >= nritems) {
3692                         ret = btrfs_next_leaf(root, path);
3693                         if (ret < 0)
3694                                 goto out;
3695                         if (ret > 0)
3696                                 break;
3697                         leaf = path->nodes[0];
3698                 }
3699
3700                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3701                 if (found_key.offset != cur_pos ||
3702                     found_key.type != BTRFS_EXTENT_DATA_KEY ||
3703                     found_key.objectid != reloc_inode->i_ino)
3704                         break;
3705
3706                 fi = btrfs_item_ptr(leaf, path->slots[0],
3707                                     struct btrfs_file_extent_item);
3708                 if (btrfs_file_extent_type(leaf, fi) !=
3709                     BTRFS_FILE_EXTENT_REG ||
3710                     btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
3711                         break;
3712
3713                 if (nr == max) {
3714                         struct disk_extent *old = exts;
3715                         max *= 2;
3716                         exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
3717                         memcpy(exts, old, sizeof(*exts) * nr);
3718                         if (old != *extents)
3719                                 kfree(old);
3720                 }
3721
3722                 exts[nr].disk_bytenr =
3723                         btrfs_file_extent_disk_bytenr(leaf, fi);
3724                 exts[nr].disk_num_bytes =
3725                         btrfs_file_extent_disk_num_bytes(leaf, fi);
3726                 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
3727                 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
3728                 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
3729                 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
3730                 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
3731                 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
3732                                                                            fi);
3733                 WARN_ON(exts[nr].offset > 0);
3734                 WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
3735
3736                 cur_pos += exts[nr].num_bytes;
3737                 nr++;
3738
3739                 if (cur_pos + offset >= last_byte)
3740                         break;
3741
3742                 if (no_fragment) {
3743                         ret = 1;
3744                         goto out;
3745                 }
3746                 path->slots[0]++;
3747         }
3748
3749         WARN_ON(cur_pos + offset > last_byte);
3750         if (cur_pos + offset < last_byte) {
3751                 ret = -ENOENT;
3752                 goto out;
3753         }
3754         ret = 0;
3755 out:
3756         btrfs_free_path(path);
3757         if (ret) {
3758                 if (exts != *extents)
3759                         kfree(exts);
3760         } else {
3761                 *extents = exts;
3762                 *nr_extents = nr;
3763         }
3764         return ret;
3765 }
3766
3767 static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
3768                                         struct btrfs_root *root,
3769                                         struct btrfs_path *path,
3770                                         struct btrfs_key *extent_key,
3771                                         struct btrfs_key *leaf_key,
3772                                         struct btrfs_ref_path *ref_path,
3773                                         struct disk_extent *new_extents,
3774                                         int nr_extents)
3775 {
3776         struct extent_buffer *leaf;
3777         struct btrfs_file_extent_item *fi;
3778         struct inode *inode = NULL;
3779         struct btrfs_key key;
3780         u64 lock_start = 0;
3781         u64 lock_end = 0;
3782         u64 num_bytes;
3783         u64 ext_offset;
3784         u64 first_pos;
3785         u32 nritems;
3786         int nr_scaned = 0;
3787         int extent_locked = 0;
3788         int ret;
3789
3790         memcpy(&key, leaf_key, sizeof(key));
3791         first_pos = INT_LIMIT(loff_t) - extent_key->offset;
3792         if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
3793                 if (key.objectid < ref_path->owner_objectid ||
3794                     (key.objectid == ref_path->owner_objectid &&
3795                      key.type < BTRFS_EXTENT_DATA_KEY)) {
3796                         key.objectid = ref_path->owner_objectid;
3797                         key.type = BTRFS_EXTENT_DATA_KEY;
3798                         key.offset = 0;
3799                 }
3800         }
3801
3802         while (1) {
3803                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3804                 if (ret < 0)
3805                         goto out;
3806
3807                 leaf = path->nodes[0];
3808                 nritems = btrfs_header_nritems(leaf);
3809 next:
3810                 if (extent_locked && ret > 0) {
3811                         /*
3812                          * the file extent item was modified by someone
3813                          * before the extent got locked.
3814                          */
3815                         mutex_unlock(&BTRFS_I(inode)->extent_mutex);
3816                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3817                                       lock_end, GFP_NOFS);
3818                         extent_locked = 0;
3819                 }
3820
3821                 if (path->slots[0] >= nritems) {
3822                         if (++nr_scaned > 2)
3823                                 break;
3824
3825                         BUG_ON(extent_locked);
3826                         ret = btrfs_next_leaf(root, path);
3827                         if (ret < 0)
3828                                 goto out;
3829                         if (ret > 0)
3830                                 break;
3831                         leaf = path->nodes[0];
3832                         nritems = btrfs_header_nritems(leaf);
3833                 }
3834
3835                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3836
3837                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
3838                         if ((key.objectid > ref_path->owner_objectid) ||
3839                             (key.objectid == ref_path->owner_objectid &&
3840                              key.type > BTRFS_EXTENT_DATA_KEY) ||
3841                             (key.offset >= first_pos + extent_key->offset))
3842                                 break;
3843                 }
3844
3845                 if (inode && key.objectid != inode->i_ino) {
3846                         BUG_ON(extent_locked);
3847                         btrfs_release_path(root, path);
3848                         mutex_unlock(&inode->i_mutex);
3849                         iput(inode);
3850                         inode = NULL;
3851                         continue;
3852                 }
3853
3854                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
3855                         path->slots[0]++;
3856                         ret = 1;
3857                         goto next;
3858                 }
3859                 fi = btrfs_item_ptr(leaf, path->slots[0],
3860                                     struct btrfs_file_extent_item);
3861                 if ((btrfs_file_extent_type(leaf, fi) !=
3862                      BTRFS_FILE_EXTENT_REG) ||
3863                     (btrfs_file_extent_disk_bytenr(leaf, fi) !=
3864                      extent_key->objectid)) {
3865                         path->slots[0]++;
3866                         ret = 1;
3867                         goto next;
3868                 }
3869
3870                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
3871                 ext_offset = btrfs_file_extent_offset(leaf, fi);
3872
3873                 if (first_pos > key.offset - ext_offset)
3874                         first_pos = key.offset - ext_offset;
3875
3876                 if (!extent_locked) {
3877                         lock_start = key.offset;
3878                         lock_end = lock_start + num_bytes - 1;
3879                 } else {
3880                         BUG_ON(lock_start != key.offset);
3881                         BUG_ON(lock_end - lock_start + 1 < num_bytes);
3882                 }
3883
3884                 if (!inode) {
3885                         btrfs_release_path(root, path);
3886
3887                         inode = btrfs_iget_locked(root->fs_info->sb,
3888                                                   key.objectid, root);
3889                         if (inode->i_state & I_NEW) {
3890                                 BTRFS_I(inode)->root = root;
3891                                 BTRFS_I(inode)->location.objectid =
3892                                         key.objectid;
3893                                 BTRFS_I(inode)->location.type =
3894                                         BTRFS_INODE_ITEM_KEY;
3895                                 BTRFS_I(inode)->location.offset = 0;
3896                                 btrfs_read_locked_inode(inode);
3897                                 unlock_new_inode(inode);
3898                         }
3899                         /*
3900                          * some code call btrfs_commit_transaction while
3901                          * holding the i_mutex, so we can't use mutex_lock
3902                          * here.
3903                          */
3904                         if (is_bad_inode(inode) ||
3905                             !mutex_trylock(&inode->i_mutex)) {
3906                                 iput(inode);
3907                                 inode = NULL;
3908                                 key.offset = (u64)-1;
3909                                 goto skip;
3910                         }
3911                 }
3912
3913                 if (!extent_locked) {
3914                         struct btrfs_ordered_extent *ordered;
3915
3916                         btrfs_release_path(root, path);
3917
3918                         lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3919                                     lock_end, GFP_NOFS);
3920                         ordered = btrfs_lookup_first_ordered_extent(inode,
3921                                                                     lock_end);
3922                         if (ordered &&
3923                             ordered->file_offset <= lock_end &&
3924                             ordered->file_offset + ordered->len > lock_start) {
3925                                 unlock_extent(&BTRFS_I(inode)->io_tree,
3926                                               lock_start, lock_end, GFP_NOFS);
3927                                 btrfs_start_ordered_extent(inode, ordered, 1);
3928                                 btrfs_put_ordered_extent(ordered);
3929                                 key.offset += num_bytes;
3930                                 goto skip;
3931                         }
3932                         if (ordered)
3933                                 btrfs_put_ordered_extent(ordered);
3934
3935                         mutex_lock(&BTRFS_I(inode)->extent_mutex);
3936                         extent_locked = 1;
3937                         continue;
3938                 }
3939
3940                 if (nr_extents == 1) {
3941                         /* update extent pointer in place */
3942                         btrfs_set_file_extent_generation(leaf, fi,
3943                                                 trans->transid);
3944                         btrfs_set_file_extent_disk_bytenr(leaf, fi,
3945                                                 new_extents[0].disk_bytenr);
3946                         btrfs_set_file_extent_disk_num_bytes(leaf, fi,
3947                                                 new_extents[0].disk_num_bytes);
3948                         btrfs_set_file_extent_ram_bytes(leaf, fi,
3949                                                 new_extents[0].ram_bytes);
3950                         ext_offset += new_extents[0].offset;
3951                         btrfs_set_file_extent_offset(leaf, fi, ext_offset);
3952                         btrfs_mark_buffer_dirty(leaf);
3953
3954                         btrfs_drop_extent_cache(inode, key.offset,
3955                                                 key.offset + num_bytes - 1, 0);
3956
3957                         ret = btrfs_inc_extent_ref(trans, root,
3958                                                 new_extents[0].disk_bytenr,
3959                                                 new_extents[0].disk_num_bytes,
3960                                                 leaf->start,
3961                                                 root->root_key.objectid,
3962                                                 trans->transid,
3963                                                 key.objectid);
3964                         BUG_ON(ret);
3965
3966                         ret = btrfs_free_extent(trans, root,
3967                                                 extent_key->objectid,
3968                                                 extent_key->offset,
3969                                                 leaf->start,
3970                                                 btrfs_header_owner(leaf),
3971                                                 btrfs_header_generation(leaf),
3972                                                 key.objectid, 0);
3973                         BUG_ON(ret);
3974
3975                         btrfs_release_path(root, path);
3976                         key.offset += num_bytes;
3977                 } else {
3978                         u64 alloc_hint;
3979                         u64 extent_len;
3980                         int i;
3981                         /*
3982                          * drop old extent pointer at first, then insert the
3983                          * new pointers one bye one
3984                          */
3985                         btrfs_release_path(root, path);
3986                         ret = btrfs_drop_extents(trans, root, inode, key.offset,
3987                                                  key.offset + num_bytes,
3988                                                  key.offset, &alloc_hint);
3989                         BUG_ON(ret);
3990
3991                         for (i = 0; i < nr_extents; i++) {
3992                                 if (ext_offset >= new_extents[i].num_bytes) {
3993                                         ext_offset -= new_extents[i].num_bytes;
3994                                         continue;
3995                                 }
3996                                 extent_len = min(new_extents[i].num_bytes -
3997                                                  ext_offset, num_bytes);
3998
3999                                 ret = btrfs_insert_empty_item(trans, root,
4000                                                               path, &key,
4001                                                               sizeof(*fi));
4002                                 BUG_ON(ret);
4003
4004                                 leaf = path->nodes[0];
4005                                 fi = btrfs_item_ptr(leaf, path->slots[0],
4006                                                 struct btrfs_file_extent_item);
4007                                 btrfs_set_file_extent_generation(leaf, fi,
4008                                                         trans->transid);
4009                                 btrfs_set_file_extent_type(leaf, fi,
4010                                                         BTRFS_FILE_EXTENT_REG);
4011                                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4012                                                 new_extents[i].disk_bytenr);
4013                                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4014                                                 new_extents[i].disk_num_bytes);
4015                                 btrfs_set_file_extent_ram_bytes(leaf, fi,
4016                                                 new_extents[i].ram_bytes);
4017
4018                                 btrfs_set_file_extent_compression(leaf, fi,
4019                                                 new_extents[i].compression);
4020                                 btrfs_set_file_extent_encryption(leaf, fi,
4021                                                 new_extents[i].encryption);
4022                                 btrfs_set_file_extent_other_encoding(leaf, fi,
4023                                                 new_extents[i].other_encoding);
4024
4025                                 btrfs_set_file_extent_num_bytes(leaf, fi,
4026                                                         extent_len);
4027                                 ext_offset += new_extents[i].offset;
4028                                 btrfs_set_file_extent_offset(leaf, fi,
4029                                                         ext_offset);
4030                                 btrfs_mark_buffer_dirty(leaf);
4031
4032                                 btrfs_drop_extent_cache(inode, key.offset,
4033                                                 key.offset + extent_len - 1, 0);
4034
4035                                 ret = btrfs_inc_extent_ref(trans, root,
4036                                                 new_extents[i].disk_bytenr,
4037                                                 new_extents[i].disk_num_bytes,
4038                                                 leaf->start,
4039                                                 root->root_key.objectid,
4040                                                 trans->transid, key.objectid);
4041                                 BUG_ON(ret);
4042                                 btrfs_release_path(root, path);
4043
4044                                 inode_add_bytes(inode, extent_len);
4045
4046                                 ext_offset = 0;
4047                                 num_bytes -= extent_len;
4048                                 key.offset += extent_len;
4049
4050                                 if (num_bytes == 0)
4051                                         break;
4052                         }
4053                         BUG_ON(i >= nr_extents);
4054                 }
4055
4056                 if (extent_locked) {
4057                         mutex_unlock(&BTRFS_I(inode)->extent_mutex);
4058                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4059                                       lock_end, GFP_NOFS);
4060                         extent_locked = 0;
4061                 }
4062 skip:
4063                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
4064                     key.offset >= first_pos + extent_key->offset)
4065                         break;
4066
4067                 cond_resched();
4068         }
4069         ret = 0;
4070 out:
4071         btrfs_release_path(root, path);
4072         if (inode) {
4073                 mutex_unlock(&inode->i_mutex);
4074                 if (extent_locked) {
4075                         mutex_unlock(&BTRFS_I(inode)->extent_mutex);
4076                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4077                                       lock_end, GFP_NOFS);
4078                 }
4079                 iput(inode);
4080         }
4081         return ret;
4082 }
4083
4084 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
4085                                struct btrfs_root *root,
4086                                struct extent_buffer *buf, u64 orig_start)
4087 {
4088         int level;
4089         int ret;
4090
4091         BUG_ON(btrfs_header_generation(buf) != trans->transid);
4092         BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
4093
4094         level = btrfs_header_level(buf);
4095         if (level == 0) {
4096                 struct btrfs_leaf_ref *ref;
4097                 struct btrfs_leaf_ref *orig_ref;
4098
4099                 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
4100                 if (!orig_ref)
4101                         return -ENOENT;
4102
4103                 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
4104                 if (!ref) {
4105                         btrfs_free_leaf_ref(root, orig_ref);
4106                         return -ENOMEM;
4107                 }
4108
4109                 ref->nritems = orig_ref->nritems;
4110                 memcpy(ref->extents, orig_ref->extents,
4111                         sizeof(ref->extents[0]) * ref->nritems);
4112
4113                 btrfs_free_leaf_ref(root, orig_ref);
4114
4115                 ref->root_gen = trans->transid;
4116                 ref->bytenr = buf->start;
4117                 ref->owner = btrfs_header_owner(buf);
4118                 ref->generation = btrfs_header_generation(buf);
4119                 ret = btrfs_add_leaf_ref(root, ref, 0);
4120                 WARN_ON(ret);
4121                 btrfs_free_leaf_ref(root, ref);
4122         }
4123         return 0;
4124 }
4125
4126 static int noinline invalidate_extent_cache(struct btrfs_root *root,
4127                                         struct extent_buffer *leaf,
4128                                         struct btrfs_block_group_cache *group,
4129                                         struct btrfs_root *target_root)
4130 {
4131         struct btrfs_key key;
4132         struct inode *inode = NULL;
4133         struct btrfs_file_extent_item *fi;
4134         u64 num_bytes;
4135         u64 skip_objectid = 0;
4136         u32 nritems;
4137         u32 i;
4138
4139         nritems = btrfs_header_nritems(leaf);
4140         for (i = 0; i < nritems; i++) {
4141                 btrfs_item_key_to_cpu(leaf, &key, i);
4142                 if (key.objectid == skip_objectid ||
4143                     key.type != BTRFS_EXTENT_DATA_KEY)
4144                         continue;
4145                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4146                 if (btrfs_file_extent_type(leaf, fi) ==
4147                     BTRFS_FILE_EXTENT_INLINE)
4148                         continue;
4149                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
4150                         continue;
4151                 if (!inode || inode->i_ino != key.objectid) {
4152                         iput(inode);
4153                         inode = btrfs_ilookup(target_root->fs_info->sb,
4154                                               key.objectid, target_root, 1);
4155                 }
4156                 if (!inode) {
4157                         skip_objectid = key.objectid;
4158                         continue;
4159                 }
4160                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4161
4162                 lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4163                             key.offset + num_bytes - 1, GFP_NOFS);
4164                 mutex_lock(&BTRFS_I(inode)->extent_mutex);
4165                 btrfs_drop_extent_cache(inode, key.offset,
4166                                         key.offset + num_bytes - 1, 1);
4167                 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
4168                 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4169                               key.offset + num_bytes - 1, GFP_NOFS);
4170                 cond_resched();
4171         }
4172         iput(inode);
4173         return 0;
4174 }
4175
4176 static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans,
4177                                         struct btrfs_root *root,
4178                                         struct extent_buffer *leaf,
4179                                         struct btrfs_block_group_cache *group,
4180                                         struct inode *reloc_inode)
4181 {
4182         struct btrfs_key key;
4183         struct btrfs_key extent_key;
4184         struct btrfs_file_extent_item *fi;
4185         struct btrfs_leaf_ref *ref;
4186         struct disk_extent *new_extent;
4187         u64 bytenr;
4188         u64 num_bytes;
4189         u32 nritems;
4190         u32 i;
4191         int ext_index;
4192         int nr_extent;
4193         int ret;
4194
4195         new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
4196         BUG_ON(!new_extent);
4197
4198         ref = btrfs_lookup_leaf_ref(root, leaf->start);
4199         BUG_ON(!ref);
4200
4201         ext_index = -1;
4202         nritems = btrfs_header_nritems(leaf);
4203         for (i = 0; i < nritems; i++) {
4204                 btrfs_item_key_to_cpu(leaf, &key, i);
4205                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
4206                         continue;
4207                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4208                 if (btrfs_file_extent_type(leaf, fi) ==
4209                     BTRFS_FILE_EXTENT_INLINE)
4210                         continue;
4211                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4212                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4213                 if (bytenr == 0)
4214                         continue;
4215
4216                 ext_index++;
4217                 if (bytenr >= group->key.objectid + group->key.offset ||
4218                     bytenr + num_bytes <= group->key.objectid)
4219                         continue;
4220
4221                 extent_key.objectid = bytenr;
4222                 extent_key.offset = num_bytes;
4223                 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4224                 nr_extent = 1;
4225                 ret = get_new_locations(reloc_inode, &extent_key,
4226                                         group->key.objectid, 1,
4227                                         &new_extent, &nr_extent);
4228                 if (ret > 0)
4229                         continue;
4230                 BUG_ON(ret < 0);
4231
4232                 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
4233                 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
4234                 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
4235                 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
4236
4237                 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
4238                 btrfs_set_file_extent_ram_bytes(leaf, fi,
4239                                                 new_extent->ram_bytes);
4240                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4241                                                 new_extent->disk_bytenr);
4242                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4243                                                 new_extent->disk_num_bytes);
4244                 new_extent->offset += btrfs_file_extent_offset(leaf, fi);
4245                 btrfs_set_file_extent_offset(leaf, fi, new_extent->offset);
4246                 btrfs_mark_buffer_dirty(leaf);
4247
4248                 ret = btrfs_inc_extent_ref(trans, root,
4249                                         new_extent->disk_bytenr,
4250                                         new_extent->disk_num_bytes,
4251                                         leaf->start,
4252                                         root->root_key.objectid,
4253                                         trans->transid, key.objectid);
4254                 BUG_ON(ret);
4255                 ret = btrfs_free_extent(trans, root,
4256                                         bytenr, num_bytes, leaf->start,
4257                                         btrfs_header_owner(leaf),
4258                                         btrfs_header_generation(leaf),
4259                                         key.objectid, 0);
4260                 BUG_ON(ret);
4261                 cond_resched();
4262         }
4263         kfree(new_extent);
4264         BUG_ON(ext_index + 1 != ref->nritems);
4265         btrfs_free_leaf_ref(root, ref);
4266         return 0;
4267 }
4268
4269 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
4270                           struct btrfs_root *root)
4271 {
4272         struct btrfs_root *reloc_root;
4273         int ret;
4274
4275         if (root->reloc_root) {
4276                 reloc_root = root->reloc_root;
4277                 root->reloc_root = NULL;
4278                 list_add(&reloc_root->dead_list,
4279                          &root->fs_info->dead_reloc_roots);
4280
4281                 btrfs_set_root_bytenr(&reloc_root->root_item,
4282                                       reloc_root->node->start);
4283                 btrfs_set_root_level(&root->root_item,
4284                                      btrfs_header_level(reloc_root->node));
4285                 memset(&reloc_root->root_item.drop_progress, 0,
4286                         sizeof(struct btrfs_disk_key));
4287                 reloc_root->root_item.drop_level = 0;
4288
4289                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
4290                                         &reloc_root->root_key,
4291                                         &reloc_root->root_item);
4292                 BUG_ON(ret);
4293         }
4294         return 0;
4295 }
4296
4297 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
4298 {
4299         struct btrfs_trans_handle *trans;
4300         struct btrfs_root *reloc_root;
4301         struct btrfs_root *prev_root = NULL;
4302         struct list_head dead_roots;
4303         int ret;
4304         unsigned long nr;
4305
4306         INIT_LIST_HEAD(&dead_roots);
4307         list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
4308
4309         while (!list_empty(&dead_roots)) {
4310                 reloc_root = list_entry(dead_roots.prev,
4311                                         struct btrfs_root, dead_list);
4312                 list_del_init(&reloc_root->dead_list);
4313
4314                 BUG_ON(reloc_root->commit_root != NULL);
4315                 while (1) {
4316                         trans = btrfs_join_transaction(root, 1);
4317                         BUG_ON(!trans);
4318
4319                         mutex_lock(&root->fs_info->drop_mutex);
4320                         ret = btrfs_drop_snapshot(trans, reloc_root);
4321                         if (ret != -EAGAIN)
4322                                 break;
4323                         mutex_unlock(&root->fs_info->drop_mutex);
4324
4325                         nr = trans->blocks_used;
4326                         ret = btrfs_end_transaction(trans, root);
4327                         BUG_ON(ret);
4328                         btrfs_btree_balance_dirty(root, nr);
4329                 }
4330
4331                 free_extent_buffer(reloc_root->node);
4332
4333                 ret = btrfs_del_root(trans, root->fs_info->tree_root,
4334                                      &reloc_root->root_key);
4335                 BUG_ON(ret);
4336                 mutex_unlock(&root->fs_info->drop_mutex);
4337
4338                 nr = trans->blocks_used;
4339                 ret = btrfs_end_transaction(trans, root);
4340                 BUG_ON(ret);
4341                 btrfs_btree_balance_dirty(root, nr);
4342
4343                 kfree(prev_root);
4344                 prev_root = reloc_root;
4345         }
4346         if (prev_root) {
4347                 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
4348                 kfree(prev_root);
4349         }
4350         return 0;
4351 }
4352
4353 int btrfs_add_dead_reloc_root(struct btrfs_root *root)
4354 {
4355         list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
4356         return 0;
4357 }
4358
4359 int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
4360 {
4361         struct btrfs_root *reloc_root;
4362         struct btrfs_trans_handle *trans;
4363         struct btrfs_key location;
4364         int found;
4365         int ret;
4366
4367         mutex_lock(&root->fs_info->tree_reloc_mutex);
4368         ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
4369         BUG_ON(ret);
4370         found = !list_empty(&root->fs_info->dead_reloc_roots);
4371         mutex_unlock(&root->fs_info->tree_reloc_mutex);
4372
4373         if (found) {
4374                 trans = btrfs_start_transaction(root, 1);
4375                 BUG_ON(!trans);
4376                 ret = btrfs_commit_transaction(trans, root);
4377                 BUG_ON(ret);
4378         }
4379
4380         location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
4381         location.offset = (u64)-1;
4382         location.type = BTRFS_ROOT_ITEM_KEY;
4383
4384         reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
4385         BUG_ON(!reloc_root);
4386         btrfs_orphan_cleanup(reloc_root);
4387         return 0;
4388 }
4389
4390 static int noinline init_reloc_tree(struct btrfs_trans_handle *trans,
4391                                     struct btrfs_root *root)
4392 {
4393         struct btrfs_root *reloc_root;
4394         struct extent_buffer *eb;
4395         struct btrfs_root_item *root_item;
4396         struct btrfs_key root_key;
4397         int ret;
4398
4399         BUG_ON(!root->ref_cows);
4400         if (root->reloc_root)
4401                 return 0;
4402
4403         root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
4404         BUG_ON(!root_item);
4405
4406         ret = btrfs_copy_root(trans, root, root->commit_root,
4407                               &eb, BTRFS_TREE_RELOC_OBJECTID);
4408         BUG_ON(ret);
4409
4410         root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4411         root_key.offset = root->root_key.objectid;
4412         root_key.type = BTRFS_ROOT_ITEM_KEY;
4413
4414         memcpy(root_item, &root->root_item, sizeof(root_item));
4415         btrfs_set_root_refs(root_item, 0);
4416         btrfs_set_root_bytenr(root_item, eb->start);
4417         btrfs_set_root_level(root_item, btrfs_header_level(eb));
4418
4419         btrfs_tree_unlock(eb);
4420         free_extent_buffer(eb);
4421
4422         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
4423                                 &root_key, root_item);
4424         BUG_ON(ret);
4425         kfree(root_item);
4426
4427         reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
4428                                                  &root_key);
4429         BUG_ON(!reloc_root);
4430         reloc_root->last_trans = trans->transid;
4431         reloc_root->commit_root = NULL;
4432         reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
4433
4434         root->reloc_root = reloc_root;
4435         return 0;
4436 }
4437
4438 /*
4439  * Core function of space balance.
4440  *
4441  * The idea is using reloc trees to relocate tree blocks in reference
4442  * counted roots. There is one reloc tree for each subvol, and all
4443  * reloc trees share same root key objectid. Reloc trees are snapshots
4444  * of the latest committed roots of subvols (root->commit_root).
4445  *
4446  * To relocate a tree block referenced by a subvol, there are two steps.
4447  * COW the block through subvol's reloc tree, then update block pointer
4448  * in the subvol to point to the new block. Since all reloc trees share
4449  * same root key objectid, doing special handing for tree blocks owned
4450  * by them is easy. Once a tree block has been COWed in one reloc tree,
4451  * we can use the resulting new block directly when the same block is
4452  * required to COW again through other reloc trees. By this way, relocated
4453  * tree blocks are shared between reloc trees, so they are also shared
4454  * between subvols.
4455  */
4456 static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
4457                                       struct btrfs_root *root,
4458                                       struct btrfs_path *path,
4459                                       struct btrfs_key *first_key,
4460                                       struct btrfs_ref_path *ref_path,
4461                                       struct btrfs_block_group_cache *group,
4462                                       struct inode *reloc_inode)
4463 {
4464         struct btrfs_root *reloc_root;
4465         struct extent_buffer *eb = NULL;
4466         struct btrfs_key *keys;
4467         u64 *nodes;
4468         int level;
4469         int shared_level;
4470         int lowest_level = 0;
4471         int ret;
4472
4473         if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
4474                 lowest_level = ref_path->owner_objectid;
4475
4476         if (!root->ref_cows) {
4477                 path->lowest_level = lowest_level;
4478                 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
4479                 BUG_ON(ret < 0);
4480                 path->lowest_level = 0;
4481                 btrfs_release_path(root, path);
4482                 return 0;
4483         }
4484
4485         mutex_lock(&root->fs_info->tree_reloc_mutex);
4486         ret = init_reloc_tree(trans, root);
4487         BUG_ON(ret);
4488         reloc_root = root->reloc_root;
4489
4490         shared_level = ref_path->shared_level;
4491         ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
4492
4493         keys = ref_path->node_keys;
4494         nodes = ref_path->new_nodes;
4495         memset(&keys[shared_level + 1], 0,
4496                sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
4497         memset(&nodes[shared_level + 1], 0,
4498                sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
4499
4500         if (nodes[lowest_level] == 0) {
4501                 path->lowest_level = lowest_level;
4502                 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
4503                                         0, 1);
4504                 BUG_ON(ret);
4505                 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
4506                         eb = path->nodes[level];
4507                         if (!eb || eb == reloc_root->node)
4508                                 break;
4509                         nodes[level] = eb->start;
4510                         if (level == 0)
4511                                 btrfs_item_key_to_cpu(eb, &keys[level], 0);
4512                         else
4513                                 btrfs_node_key_to_cpu(eb, &keys[level], 0);
4514                 }
4515                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4516                         eb = path->nodes[0];
4517                         ret = replace_extents_in_leaf(trans, reloc_root, eb,
4518                                                       group, reloc_inode);
4519                         BUG_ON(ret);
4520                 }
4521                 btrfs_release_path(reloc_root, path);
4522         } else {
4523                 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
4524                                        lowest_level);
4525                 BUG_ON(ret);
4526         }
4527
4528         /*
4529          * replace tree blocks in the fs tree with tree blocks in
4530          * the reloc tree.
4531          */
4532         ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
4533         BUG_ON(ret < 0);
4534
4535         if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4536                 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
4537                                         0, 0);
4538                 BUG_ON(ret);
4539                 extent_buffer_get(path->nodes[0]);
4540                 eb = path->nodes[0];
4541                 btrfs_release_path(reloc_root, path);
4542                 ret = invalidate_extent_cache(reloc_root, eb, group, root);
4543                 BUG_ON(ret);
4544                 free_extent_buffer(eb);
4545         }
4546
4547         mutex_unlock(&root->fs_info->tree_reloc_mutex);
4548         path->lowest_level = 0;
4549         return 0;
4550 }
4551
4552 static int noinline relocate_tree_block(struct btrfs_trans_handle *trans,
4553                                         struct btrfs_root *root,
4554                                         struct btrfs_path *path,
4555                                         struct btrfs_key *first_key,
4556                                         struct btrfs_ref_path *ref_path)
4557 {
4558         int ret;
4559         int needs_lock = 0;
4560
4561         if (root == root->fs_info->extent_root ||
4562             root == root->fs_info->chunk_root ||
4563             root == root->fs_info->dev_root) {
4564                 needs_lock = 1;
4565                 mutex_lock(&root->fs_info->alloc_mutex);
4566         }
4567
4568         ret = relocate_one_path(trans, root, path, first_key,
4569                                 ref_path, NULL, NULL);
4570         BUG_ON(ret);
4571
4572         if (root == root->fs_info->extent_root)
4573                 btrfs_extent_post_op(trans, root);
4574         if (needs_lock)
4575                 mutex_unlock(&root->fs_info->alloc_mutex);
4576
4577         return 0;
4578 }
4579
4580 static int noinline del_extent_zero(struct btrfs_trans_handle *trans,
4581                                     struct btrfs_root *extent_root,
4582                                     struct btrfs_path *path,
4583                                     struct btrfs_key *extent_key)
4584 {
4585         int ret;
4586
4587         mutex_lock(&extent_root->fs_info->alloc_mutex);
4588         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
4589         if (ret)
4590                 goto out;
4591         ret = btrfs_del_item(trans, extent_root, path);
4592 out:
4593         btrfs_release_path(extent_root, path);
4594         mutex_unlock(&extent_root->fs_info->alloc_mutex);
4595         return ret;
4596 }
4597
4598 static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info,
4599                                                 struct btrfs_ref_path *ref_path)
4600 {
4601         struct btrfs_key root_key;
4602
4603         root_key.objectid = ref_path->root_objectid;
4604         root_key.type = BTRFS_ROOT_ITEM_KEY;
4605         if (is_cowonly_root(ref_path->root_objectid))
4606                 root_key.offset = 0;
4607         else
4608                 root_key.offset = (u64)-1;
4609
4610         return btrfs_read_fs_root_no_name(fs_info, &root_key);
4611 }
4612
4613 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
4614                                         struct btrfs_path *path,
4615                                         struct btrfs_key *extent_key,
4616                                         struct btrfs_block_group_cache *group,
4617                                         struct inode *reloc_inode, int pass)
4618 {
4619         struct btrfs_trans_handle *trans;
4620         struct btrfs_root *found_root;
4621         struct btrfs_ref_path *ref_path = NULL;
4622         struct disk_extent *new_extents = NULL;
4623         int nr_extents = 0;
4624         int loops;
4625         int ret;
4626         int level;
4627         struct btrfs_key first_key;
4628         u64 prev_block = 0;
4629
4630         mutex_unlock(&extent_root->fs_info->alloc_mutex);
4631
4632         trans = btrfs_start_transaction(extent_root, 1);
4633         BUG_ON(!trans);
4634
4635         if (extent_key->objectid == 0) {
4636                 ret = del_extent_zero(trans, extent_root, path, extent_key);
4637                 goto out;
4638         }
4639
4640         ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
4641         if (!ref_path) {
4642                ret = -ENOMEM;
4643                goto out;
4644         }
4645
4646         for (loops = 0; ; loops++) {
4647                 if (loops == 0) {
4648                         ret = btrfs_first_ref_path(trans, extent_root, ref_path,
4649                                                    extent_key->objectid);
4650                 } else {
4651                         ret = btrfs_next_ref_path(trans, extent_root, ref_path);
4652                 }
4653                 if (ret < 0)
4654                         goto out;
4655                 if (ret > 0)
4656                         break;
4657
4658                 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
4659                     ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
4660                         continue;
4661
4662                 found_root = read_ref_root(extent_root->fs_info, ref_path);
4663                 BUG_ON(!found_root);
4664                 /*
4665                  * for reference counted tree, only process reference paths
4666                  * rooted at the latest committed root.
4667                  */
4668                 if (found_root->ref_cows &&
4669                     ref_path->root_generation != found_root->root_key.offset)
4670                         continue;
4671
4672                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4673                         if (pass == 0) {
4674                                 /*
4675                                  * copy data extents to new locations
4676                                  */
4677                                 u64 group_start = group->key.objectid;
4678                                 ret = relocate_data_extent(reloc_inode,
4679                                                            extent_key,
4680                                                            group_start);
4681                                 if (ret < 0)
4682                                         goto out;
4683                                 break;
4684                         }
4685                         level = 0;
4686                 } else {
4687                         level = ref_path->owner_objectid;
4688                 }
4689
4690                 if (prev_block != ref_path->nodes[level]) {
4691                         struct extent_buffer *eb;
4692                         u64 block_start = ref_path->nodes[level];
4693                         u64 block_size = btrfs_level_size(found_root, level);
4694
4695                         eb = read_tree_block(found_root, block_start,
4696                                              block_size, 0);
4697                         btrfs_tree_lock(eb);
4698                         BUG_ON(level != btrfs_header_level(eb));
4699
4700                         if (level == 0)
4701                                 btrfs_item_key_to_cpu(eb, &first_key, 0);
4702                         else
4703                                 btrfs_node_key_to_cpu(eb, &first_key, 0);
4704
4705                         btrfs_tree_unlock(eb);
4706                         free_extent_buffer(eb);
4707                         prev_block = block_start;
4708                 }
4709
4710                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4711                     pass >= 2) {
4712                         /*
4713                          * use fallback method to process the remaining
4714                          * references.
4715                          */
4716                         if (!new_extents) {
4717                                 u64 group_start = group->key.objectid;
4718                                 ret = get_new_locations(reloc_inode,
4719                                                         extent_key,
4720                                                         group_start, 0,
4721                                                         &new_extents,
4722                                                         &nr_extents);
4723                                 if (ret < 0)
4724                                         goto out;
4725                         }
4726                         btrfs_record_root_in_trans(found_root);
4727                         ret = replace_one_extent(trans, found_root,
4728                                                 path, extent_key,
4729                                                 &first_key, ref_path,
4730                                                 new_extents, nr_extents);
4731                         if (ret < 0)
4732                                 goto out;
4733                         continue;
4734                 }
4735
4736                 btrfs_record_root_in_trans(found_root);
4737                 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
4738                         ret = relocate_tree_block(trans, found_root, path,
4739                                                   &first_key, ref_path);
4740                 } else {
4741                         /*
4742                          * try to update data extent references while
4743                          * keeping metadata shared between snapshots.
4744                          */
4745                         ret = relocate_one_path(trans, found_root, path,
4746                                                 &first_key, ref_path,
4747                                                 group, reloc_inode);
4748                 }
4749                 if (ret < 0)
4750                         goto out;
4751         }
4752         ret = 0;
4753 out:
4754         btrfs_end_transaction(trans, extent_root);
4755         kfree(new_extents);
4756         kfree(ref_path);
4757         mutex_lock(&extent_root->fs_info->alloc_mutex);
4758         return ret;
4759 }
4760
4761 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
4762 {
4763         u64 num_devices;
4764         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
4765                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
4766
4767         num_devices = root->fs_info->fs_devices->num_devices;
4768         if (num_devices == 1) {
4769                 stripped |= BTRFS_BLOCK_GROUP_DUP;
4770                 stripped = flags & ~stripped;
4771
4772                 /* turn raid0 into single device chunks */
4773                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
4774                         return stripped;
4775
4776                 /* turn mirroring into duplication */
4777                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
4778                              BTRFS_BLOCK_GROUP_RAID10))
4779                         return stripped | BTRFS_BLOCK_GROUP_DUP;
4780                 return flags;
4781         } else {
4782                 /* they already had raid on here, just return */
4783                 if (flags & stripped)
4784                         return flags;
4785
4786                 stripped |= BTRFS_BLOCK_GROUP_DUP;
4787                 stripped = flags & ~stripped;
4788
4789                 /* switch duplicated blocks with raid1 */
4790                 if (flags & BTRFS_BLOCK_GROUP_DUP)
4791                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
4792
4793                 /* turn single device chunks into raid0 */
4794                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
4795         }
4796         return flags;
4797 }
4798
4799 int __alloc_chunk_for_shrink(struct btrfs_root *root,
4800                      struct btrfs_block_group_cache *shrink_block_group,
4801                      int force)
4802 {
4803         struct btrfs_trans_handle *trans;
4804         u64 new_alloc_flags;
4805         u64 calc;
4806
4807         spin_lock(&shrink_block_group->lock);
4808         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
4809                 spin_unlock(&shrink_block_group->lock);
4810                 mutex_unlock(&root->fs_info->alloc_mutex);
4811
4812                 trans = btrfs_start_transaction(root, 1);
4813                 mutex_lock(&root->fs_info->alloc_mutex);
4814                 spin_lock(&shrink_block_group->lock);
4815
4816                 new_alloc_flags = update_block_group_flags(root,
4817                                                    shrink_block_group->flags);
4818                 if (new_alloc_flags != shrink_block_group->flags) {
4819                         calc =
4820                              btrfs_block_group_used(&shrink_block_group->item);
4821                 } else {
4822                         calc = shrink_block_group->key.offset;
4823                 }
4824                 spin_unlock(&shrink_block_group->lock);
4825
4826                 do_chunk_alloc(trans, root->fs_info->extent_root,
4827                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
4828
4829                 mutex_unlock(&root->fs_info->alloc_mutex);
4830                 btrfs_end_transaction(trans, root);
4831                 mutex_lock(&root->fs_info->alloc_mutex);
4832         } else
4833                 spin_unlock(&shrink_block_group->lock);
4834         return 0;
4835 }
4836
4837 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
4838                                  struct btrfs_root *root,
4839                                  u64 objectid, u64 size)
4840 {
4841         struct btrfs_path *path;
4842         struct btrfs_inode_item *item;
4843         struct extent_buffer *leaf;
4844         int ret;
4845
4846         path = btrfs_alloc_path();
4847         if (!path)
4848                 return -ENOMEM;
4849
4850         ret = btrfs_insert_empty_inode(trans, root, path, objectid);
4851         if (ret)
4852                 goto out;
4853
4854         leaf = path->nodes[0];
4855         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
4856         memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
4857         btrfs_set_inode_generation(leaf, item, 1);
4858         btrfs_set_inode_size(leaf, item, size);
4859         btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
4860         btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM);
4861         btrfs_mark_buffer_dirty(leaf);
4862         btrfs_release_path(root, path);
4863 out:
4864         btrfs_free_path(path);
4865         return ret;
4866 }
4867
4868 static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
4869                                         struct btrfs_block_group_cache *group)
4870 {
4871         struct inode *inode = NULL;
4872         struct btrfs_trans_handle *trans;
4873         struct btrfs_root *root;
4874         struct btrfs_key root_key;
4875         u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
4876         int err = 0;
4877
4878         root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
4879         root_key.type = BTRFS_ROOT_ITEM_KEY;
4880         root_key.offset = (u64)-1;
4881         root = btrfs_read_fs_root_no_name(fs_info, &root_key);
4882         if (IS_ERR(root))
4883                 return ERR_CAST(root);
4884
4885         trans = btrfs_start_transaction(root, 1);
4886         BUG_ON(!trans);
4887
4888         err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
4889         if (err)
4890                 goto out;
4891
4892         err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
4893         BUG_ON(err);
4894
4895         err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
4896                                        group->key.offset, 0, group->key.offset,
4897                                        0, 0, 0);
4898         BUG_ON(err);
4899
4900         inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
4901         if (inode->i_state & I_NEW) {
4902                 BTRFS_I(inode)->root = root;
4903                 BTRFS_I(inode)->location.objectid = objectid;
4904                 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
4905                 BTRFS_I(inode)->location.offset = 0;
4906                 btrfs_read_locked_inode(inode);
4907                 unlock_new_inode(inode);
4908                 BUG_ON(is_bad_inode(inode));
4909         } else {
4910                 BUG_ON(1);
4911         }
4912
4913         err = btrfs_orphan_add(trans, inode);
4914 out:
4915         btrfs_end_transaction(trans, root);
4916         if (err) {
4917                 if (inode)
4918                         iput(inode);
4919                 inode = ERR_PTR(err);
4920         }
4921         return inode;
4922 }
4923
4924 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
4925 {
4926         struct btrfs_trans_handle *trans;
4927         struct btrfs_path *path;
4928         struct btrfs_fs_info *info = root->fs_info;
4929         struct extent_buffer *leaf;
4930         struct inode *reloc_inode;
4931         struct btrfs_block_group_cache *block_group;
4932         struct btrfs_key key;
4933         u64 cur_byte;
4934         u64 total_found;
4935         u32 nritems;
4936         int ret;
4937         int progress;
4938         int pass = 0;
4939
4940         root = root->fs_info->extent_root;
4941
4942         block_group = btrfs_lookup_block_group(info, group_start);
4943         BUG_ON(!block_group);
4944
4945         printk("btrfs relocating block group %llu flags %llu\n",
4946                (unsigned long long)block_group->key.objectid,
4947                (unsigned long long)block_group->flags);
4948
4949         path = btrfs_alloc_path();
4950         BUG_ON(!path);
4951
4952         reloc_inode = create_reloc_inode(info, block_group);
4953         BUG_ON(IS_ERR(reloc_inode));
4954
4955         mutex_lock(&root->fs_info->alloc_mutex);
4956
4957         __alloc_chunk_for_shrink(root, block_group, 1);
4958         block_group->ro = 1;
4959         block_group->space_info->total_bytes -= block_group->key.offset;
4960
4961         mutex_unlock(&root->fs_info->alloc_mutex);
4962
4963         btrfs_start_delalloc_inodes(info->tree_root);
4964         btrfs_wait_ordered_extents(info->tree_root, 0);
4965 again:
4966         total_found = 0;
4967         progress = 0;
4968         key.objectid = block_group->key.objectid;
4969         key.offset = 0;
4970         key.type = 0;
4971         cur_byte = key.objectid;
4972
4973         trans = btrfs_start_transaction(info->tree_root, 1);
4974         btrfs_commit_transaction(trans, info->tree_root);
4975
4976         mutex_lock(&root->fs_info->cleaner_mutex);
4977         btrfs_clean_old_snapshots(info->tree_root);
4978         btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
4979         mutex_unlock(&root->fs_info->cleaner_mutex);
4980
4981         mutex_lock(&root->fs_info->alloc_mutex);
4982
4983         while(1) {
4984                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4985                 if (ret < 0)
4986                         goto out;
4987 next:
4988                 leaf = path->nodes[0];
4989                 nritems = btrfs_header_nritems(leaf);
4990                 if (path->slots[0] >= nritems) {
4991                         ret = btrfs_next_leaf(root, path);
4992                         if (ret < 0)
4993                                 goto out;
4994                         if (ret == 1) {
4995                                 ret = 0;
4996                                 break;
4997                         }
4998                         leaf = path->nodes[0];
4999                         nritems = btrfs_header_nritems(leaf);
5000                 }
5001
5002                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5003
5004                 if (key.objectid >= block_group->key.objectid +
5005                     block_group->key.offset)
5006                         break;
5007
5008                 if (progress && need_resched()) {
5009                         btrfs_release_path(root, path);
5010                         mutex_unlock(&root->fs_info->alloc_mutex);
5011                         cond_resched();
5012                         mutex_lock(&root->fs_info->alloc_mutex);
5013                         progress = 0;
5014                         continue;
5015                 }
5016                 progress = 1;
5017
5018                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
5019                     key.objectid + key.offset <= cur_byte) {
5020                         path->slots[0]++;
5021                         goto next;
5022                 }
5023
5024                 total_found++;
5025                 cur_byte = key.objectid + key.offset;
5026                 btrfs_release_path(root, path);
5027
5028                 __alloc_chunk_for_shrink(root, block_group, 0);
5029                 ret = relocate_one_extent(root, path, &key, block_group,
5030                                           reloc_inode, pass);
5031                 BUG_ON(ret < 0);
5032
5033                 key.objectid = cur_byte;
5034                 key.type = 0;
5035                 key.offset = 0;
5036         }
5037
5038         btrfs_release_path(root, path);
5039         mutex_unlock(&root->fs_info->alloc_mutex);
5040
5041         if (pass == 0) {
5042                 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
5043                 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
5044                 WARN_ON(reloc_inode->i_mapping->nrpages);
5045         }
5046
5047         if (total_found > 0) {
5048                 printk("btrfs found %llu extents in pass %d\n",
5049                        (unsigned long long)total_found, pass);
5050                 pass++;
5051                 goto again;
5052         }
5053
5054         /* delete reloc_inode */
5055         iput(reloc_inode);
5056
5057         /* unpin extents in this range */
5058         trans = btrfs_start_transaction(info->tree_root, 1);
5059         btrfs_commit_transaction(trans, info->tree_root);
5060
5061         mutex_lock(&root->fs_info->alloc_mutex);
5062
5063         spin_lock(&block_group->lock);
5064         WARN_ON(block_group->pinned > 0);
5065         WARN_ON(block_group->reserved > 0);
5066         WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
5067         spin_unlock(&block_group->lock);
5068         ret = 0;
5069 out:
5070         mutex_unlock(&root->fs_info->alloc_mutex);
5071         btrfs_free_path(path);
5072         return ret;
5073 }
5074
5075 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
5076                            struct btrfs_key *key)
5077 {
5078         int ret = 0;
5079         struct btrfs_key found_key;
5080         struct extent_buffer *leaf;
5081         int slot;
5082
5083         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
5084         if (ret < 0)
5085                 goto out;
5086
5087         while(1) {
5088                 slot = path->slots[0];
5089                 leaf = path->nodes[0];
5090                 if (slot >= btrfs_header_nritems(leaf)) {
5091                         ret = btrfs_next_leaf(root, path);
5092                         if (ret == 0)
5093                                 continue;
5094                         if (ret < 0)
5095                                 goto out;
5096                         break;
5097                 }
5098                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5099
5100                 if (found_key.objectid >= key->objectid &&
5101                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5102                         ret = 0;
5103                         goto out;
5104                 }
5105                 path->slots[0]++;
5106         }
5107         ret = -ENOENT;
5108 out:
5109         return ret;
5110 }
5111
5112 int btrfs_free_block_groups(struct btrfs_fs_info *info)
5113 {
5114         struct btrfs_block_group_cache *block_group;
5115         struct rb_node *n;
5116
5117         mutex_lock(&info->alloc_mutex);
5118         spin_lock(&info->block_group_cache_lock);
5119         while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
5120                 block_group = rb_entry(n, struct btrfs_block_group_cache,
5121                                        cache_node);
5122
5123                 spin_unlock(&info->block_group_cache_lock);
5124                 btrfs_remove_free_space_cache(block_group);
5125                 spin_lock(&info->block_group_cache_lock);
5126
5127                 rb_erase(&block_group->cache_node,
5128                          &info->block_group_cache_tree);
5129                 down_write(&block_group->space_info->groups_sem);
5130                 list_del(&block_group->list);
5131                 up_write(&block_group->space_info->groups_sem);
5132                 kfree(block_group);
5133         }
5134         spin_unlock(&info->block_group_cache_lock);
5135         mutex_unlock(&info->alloc_mutex);
5136         return 0;
5137 }
5138
5139 int btrfs_read_block_groups(struct btrfs_root *root)
5140 {
5141         struct btrfs_path *path;
5142         int ret;
5143         struct btrfs_block_group_cache *cache;
5144         struct btrfs_fs_info *info = root->fs_info;
5145         struct btrfs_space_info *space_info;
5146         struct btrfs_key key;
5147         struct btrfs_key found_key;
5148         struct extent_buffer *leaf;
5149
5150         root = info->extent_root;
5151         key.objectid = 0;
5152         key.offset = 0;
5153         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
5154         path = btrfs_alloc_path();
5155         if (!path)
5156                 return -ENOMEM;
5157
5158         mutex_lock(&root->fs_info->alloc_mutex);
5159         while(1) {
5160                 ret = find_first_block_group(root, path, &key);
5161                 if (ret > 0) {
5162                         ret = 0;
5163                         goto error;
5164                 }
5165                 if (ret != 0)
5166                         goto error;
5167
5168                 leaf = path->nodes[0];
5169                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5170                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
5171                 if (!cache) {
5172                         ret = -ENOMEM;
5173                         break;
5174                 }
5175
5176                 spin_lock_init(&cache->lock);
5177                 INIT_LIST_HEAD(&cache->list);
5178                 read_extent_buffer(leaf, &cache->item,
5179                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
5180                                    sizeof(cache->item));
5181                 memcpy(&cache->key, &found_key, sizeof(found_key));
5182
5183                 key.objectid = found_key.objectid + found_key.offset;
5184                 btrfs_release_path(root, path);
5185                 cache->flags = btrfs_block_group_flags(&cache->item);
5186
5187                 ret = update_space_info(info, cache->flags, found_key.offset,
5188                                         btrfs_block_group_used(&cache->item),
5189                                         &space_info);
5190                 BUG_ON(ret);
5191                 cache->space_info = space_info;
5192                 down_write(&space_info->groups_sem);
5193                 list_add_tail(&cache->list, &space_info->block_groups);
5194                 up_write(&space_info->groups_sem);
5195
5196                 ret = btrfs_add_block_group_cache(root->fs_info, cache);
5197                 BUG_ON(ret);
5198
5199                 set_avail_alloc_bits(root->fs_info, cache->flags);
5200         }
5201         ret = 0;
5202 error:
5203         btrfs_free_path(path);
5204         mutex_unlock(&root->fs_info->alloc_mutex);
5205         return ret;
5206 }
5207
5208 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
5209                            struct btrfs_root *root, u64 bytes_used,
5210                            u64 type, u64 chunk_objectid, u64 chunk_offset,
5211                            u64 size)
5212 {
5213         int ret;
5214         struct btrfs_root *extent_root;
5215         struct btrfs_block_group_cache *cache;
5216
5217         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
5218         extent_root = root->fs_info->extent_root;
5219
5220         root->fs_info->last_trans_new_blockgroup = trans->transid;
5221
5222         cache = kzalloc(sizeof(*cache), GFP_NOFS);
5223         if (!cache)
5224                 return -ENOMEM;
5225
5226         cache->key.objectid = chunk_offset;
5227         cache->key.offset = size;
5228         spin_lock_init(&cache->lock);
5229         INIT_LIST_HEAD(&cache->list);
5230         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
5231
5232         btrfs_set_block_group_used(&cache->item, bytes_used);
5233         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
5234         cache->flags = type;
5235         btrfs_set_block_group_flags(&cache->item, type);
5236
5237         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
5238                                 &cache->space_info);
5239         BUG_ON(ret);
5240         down_write(&cache->space_info->groups_sem);
5241         list_add_tail(&cache->list, &cache->space_info->block_groups);
5242         up_write(&cache->space_info->groups_sem);
5243
5244         ret = btrfs_add_block_group_cache(root->fs_info, cache);
5245         BUG_ON(ret);
5246
5247         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
5248                                 sizeof(cache->item));
5249         BUG_ON(ret);
5250
5251         finish_current_insert(trans, extent_root);
5252         ret = del_pending_extents(trans, extent_root);
5253         BUG_ON(ret);
5254         set_avail_alloc_bits(extent_root->fs_info, type);
5255
5256         return 0;
5257 }
5258
5259 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5260                              struct btrfs_root *root, u64 group_start)
5261 {
5262         struct btrfs_path *path;
5263         struct btrfs_block_group_cache *block_group;
5264         struct btrfs_key key;
5265         int ret;
5266
5267         BUG_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
5268         root = root->fs_info->extent_root;
5269
5270         block_group = btrfs_lookup_block_group(root->fs_info, group_start);
5271         BUG_ON(!block_group);
5272
5273         memcpy(&key, &block_group->key, sizeof(key));
5274
5275         path = btrfs_alloc_path();
5276         BUG_ON(!path);
5277
5278         btrfs_remove_free_space_cache(block_group);
5279         rb_erase(&block_group->cache_node,
5280                  &root->fs_info->block_group_cache_tree);
5281         down_write(&block_group->space_info->groups_sem);
5282         list_del(&block_group->list);
5283         up_write(&block_group->space_info->groups_sem);
5284
5285         /*
5286         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
5287         kfree(shrink_block_group);
5288         */
5289
5290         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5291         if (ret > 0)
5292                 ret = -EIO;
5293         if (ret < 0)
5294                 goto out;
5295
5296         ret = btrfs_del_item(trans, root, path);
5297 out:
5298         btrfs_free_path(path);
5299         return ret;
5300 }