db37b867e4f168089ba59d1e801a3637dd579e0b
[safe/jmp/linux-2.6] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include "hash.h"
23 #include "crc32c.h"
24 #include "ctree.h"
25 #include "disk-io.h"
26 #include "print-tree.h"
27 #include "transaction.h"
28 #include "volumes.h"
29 #include "locking.h"
30 #include "ref-cache.h"
31
32 #define PENDING_EXTENT_INSERT 0
33 #define PENDING_EXTENT_DELETE 1
34 #define PENDING_BACKREF_UPDATE 2
35
36 struct pending_extent_op {
37         int type;
38         u64 bytenr;
39         u64 num_bytes;
40         u64 parent;
41         u64 orig_parent;
42         u64 generation;
43         u64 orig_generation;
44         int level;
45 };
46
47 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
48                                  btrfs_root *extent_root);
49 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
50                                btrfs_root *extent_root);
51 static struct btrfs_block_group_cache *
52 __btrfs_find_block_group(struct btrfs_root *root,
53                          struct btrfs_block_group_cache *hint,
54                          u64 search_start, int data, int owner);
55
56 void maybe_lock_mutex(struct btrfs_root *root)
57 {
58         if (root != root->fs_info->extent_root &&
59             root != root->fs_info->chunk_root &&
60             root != root->fs_info->dev_root) {
61                 mutex_lock(&root->fs_info->alloc_mutex);
62         }
63 }
64
65 void maybe_unlock_mutex(struct btrfs_root *root)
66 {
67         if (root != root->fs_info->extent_root &&
68             root != root->fs_info->chunk_root &&
69             root != root->fs_info->dev_root) {
70                 mutex_unlock(&root->fs_info->alloc_mutex);
71         }
72 }
73
74 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
75 {
76         return (cache->flags & bits) == bits;
77 }
78
79 /*
80  * this adds the block group to the fs_info rb tree for the block group
81  * cache
82  */
83 int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
84                                 struct btrfs_block_group_cache *block_group)
85 {
86         struct rb_node **p;
87         struct rb_node *parent = NULL;
88         struct btrfs_block_group_cache *cache;
89
90         spin_lock(&info->block_group_cache_lock);
91         p = &info->block_group_cache_tree.rb_node;
92
93         while (*p) {
94                 parent = *p;
95                 cache = rb_entry(parent, struct btrfs_block_group_cache,
96                                  cache_node);
97                 if (block_group->key.objectid < cache->key.objectid) {
98                         p = &(*p)->rb_left;
99                 } else if (block_group->key.objectid > cache->key.objectid) {
100                         p = &(*p)->rb_right;
101                 } else {
102                         spin_unlock(&info->block_group_cache_lock);
103                         return -EEXIST;
104                 }
105         }
106
107         rb_link_node(&block_group->cache_node, parent, p);
108         rb_insert_color(&block_group->cache_node,
109                         &info->block_group_cache_tree);
110         spin_unlock(&info->block_group_cache_lock);
111
112         return 0;
113 }
114
115 /*
116  * This will return the block group at or after bytenr if contains is 0, else
117  * it will return the block group that contains the bytenr
118  */
119 static struct btrfs_block_group_cache *
120 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
121                               int contains)
122 {
123         struct btrfs_block_group_cache *cache, *ret = NULL;
124         struct rb_node *n;
125         u64 end, start;
126
127         spin_lock(&info->block_group_cache_lock);
128         n = info->block_group_cache_tree.rb_node;
129
130         while (n) {
131                 cache = rb_entry(n, struct btrfs_block_group_cache,
132                                  cache_node);
133                 end = cache->key.objectid + cache->key.offset - 1;
134                 start = cache->key.objectid;
135
136                 if (bytenr < start) {
137                         if (!contains && (!ret || start < ret->key.objectid))
138                                 ret = cache;
139                         n = n->rb_left;
140                 } else if (bytenr > start) {
141                         if (contains && bytenr <= end) {
142                                 ret = cache;
143                                 break;
144                         }
145                         n = n->rb_right;
146                 } else {
147                         ret = cache;
148                         break;
149                 }
150         }
151         spin_unlock(&info->block_group_cache_lock);
152
153         return ret;
154 }
155
156 /*
157  * this is only called by cache_block_group, since we could have freed extents
158  * we need to check the pinned_extents for any extents that can't be used yet
159  * since their free space will be released as soon as the transaction commits.
160  */
161 static int add_new_free_space(struct btrfs_block_group_cache *block_group,
162                               struct btrfs_fs_info *info, u64 start, u64 end)
163 {
164         u64 extent_start, extent_end, size;
165         int ret;
166
167         while (start < end) {
168                 ret = find_first_extent_bit(&info->pinned_extents, start,
169                                             &extent_start, &extent_end,
170                                             EXTENT_DIRTY);
171                 if (ret)
172                         break;
173
174                 if (extent_start == start) {
175                         start = extent_end + 1;
176                 } else if (extent_start > start && extent_start < end) {
177                         size = extent_start - start;
178                         ret = btrfs_add_free_space(block_group, start, size);
179                         BUG_ON(ret);
180                         start = extent_end + 1;
181                 } else {
182                         break;
183                 }
184         }
185
186         if (start < end) {
187                 size = end - start;
188                 ret = btrfs_add_free_space(block_group, start, size);
189                 BUG_ON(ret);
190         }
191
192         return 0;
193 }
194
195 static int cache_block_group(struct btrfs_root *root,
196                              struct btrfs_block_group_cache *block_group)
197 {
198         struct btrfs_path *path;
199         int ret = 0;
200         struct btrfs_key key;
201         struct extent_buffer *leaf;
202         int slot;
203         u64 last = 0;
204         u64 first_free;
205         int found = 0;
206
207         if (!block_group)
208                 return 0;
209
210         root = root->fs_info->extent_root;
211
212         if (block_group->cached)
213                 return 0;
214
215         path = btrfs_alloc_path();
216         if (!path)
217                 return -ENOMEM;
218
219         path->reada = 2;
220         /*
221          * we get into deadlocks with paths held by callers of this function.
222          * since the alloc_mutex is protecting things right now, just
223          * skip the locking here
224          */
225         path->skip_locking = 1;
226         first_free = max_t(u64, block_group->key.objectid,
227                            BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
228         key.objectid = block_group->key.objectid;
229         key.offset = 0;
230         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
231         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
232         if (ret < 0)
233                 goto err;
234         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
235         if (ret < 0)
236                 goto err;
237         if (ret == 0) {
238                 leaf = path->nodes[0];
239                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
240                 if (key.objectid + key.offset > first_free)
241                         first_free = key.objectid + key.offset;
242         }
243         while(1) {
244                 leaf = path->nodes[0];
245                 slot = path->slots[0];
246                 if (slot >= btrfs_header_nritems(leaf)) {
247                         ret = btrfs_next_leaf(root, path);
248                         if (ret < 0)
249                                 goto err;
250                         if (ret == 0)
251                                 continue;
252                         else
253                                 break;
254                 }
255                 btrfs_item_key_to_cpu(leaf, &key, slot);
256                 if (key.objectid < block_group->key.objectid)
257                         goto next;
258
259                 if (key.objectid >= block_group->key.objectid +
260                     block_group->key.offset)
261                         break;
262
263                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
264                         if (!found) {
265                                 last = first_free;
266                                 found = 1;
267                         }
268
269                         add_new_free_space(block_group, root->fs_info, last,
270                                            key.objectid);
271
272                         last = key.objectid + key.offset;
273                 }
274 next:
275                 path->slots[0]++;
276         }
277
278         if (!found)
279                 last = first_free;
280
281         add_new_free_space(block_group, root->fs_info, last,
282                            block_group->key.objectid +
283                            block_group->key.offset);
284
285         block_group->cached = 1;
286         ret = 0;
287 err:
288         btrfs_free_path(path);
289         return ret;
290 }
291
292 /*
293  * return the block group that starts at or after bytenr
294  */
295 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
296                                                        btrfs_fs_info *info,
297                                                          u64 bytenr)
298 {
299         struct btrfs_block_group_cache *cache;
300
301         cache = block_group_cache_tree_search(info, bytenr, 0);
302
303         return cache;
304 }
305
306 /*
307  * return the block group that contains teh given bytenr
308  */
309 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
310                                                          btrfs_fs_info *info,
311                                                          u64 bytenr)
312 {
313         struct btrfs_block_group_cache *cache;
314
315         cache = block_group_cache_tree_search(info, bytenr, 1);
316
317         return cache;
318 }
319
320 static int noinline find_free_space(struct btrfs_root *root,
321                                     struct btrfs_block_group_cache **cache_ret,
322                                     u64 *start_ret, u64 num, int data)
323 {
324         int ret;
325         struct btrfs_block_group_cache *cache = *cache_ret;
326         struct btrfs_free_space *info = NULL;
327         u64 last;
328         u64 search_start = *start_ret;
329
330         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
331         if (!cache)
332                 goto out;
333
334         last = max(search_start, cache->key.objectid);
335
336 again:
337         ret = cache_block_group(root, cache);
338         if (ret)
339                 goto out;
340
341         if (cache->ro || !block_group_bits(cache, data))
342                 goto new_group;
343
344         info = btrfs_find_free_space(cache, last, num);
345         if (info) {
346                 *start_ret = info->offset;
347                 return 0;
348         }
349
350 new_group:
351         last = cache->key.objectid + cache->key.offset;
352
353         cache = btrfs_lookup_first_block_group(root->fs_info, last);
354         if (!cache)
355                 goto out;
356
357         *cache_ret = cache;
358         goto again;
359
360 out:
361         return -ENOSPC;
362 }
363
364 static u64 div_factor(u64 num, int factor)
365 {
366         if (factor == 10)
367                 return num;
368         num *= factor;
369         do_div(num, 10);
370         return num;
371 }
372
373 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
374                                                   u64 flags)
375 {
376         struct list_head *head = &info->space_info;
377         struct list_head *cur;
378         struct btrfs_space_info *found;
379         list_for_each(cur, head) {
380                 found = list_entry(cur, struct btrfs_space_info, list);
381                 if (found->flags == flags)
382                         return found;
383         }
384         return NULL;
385 }
386
387 static struct btrfs_block_group_cache *
388 __btrfs_find_block_group(struct btrfs_root *root,
389                          struct btrfs_block_group_cache *hint,
390                          u64 search_start, int data, int owner)
391 {
392         struct btrfs_block_group_cache *cache;
393         struct btrfs_block_group_cache *found_group = NULL;
394         struct btrfs_fs_info *info = root->fs_info;
395         u64 used;
396         u64 last = 0;
397         u64 free_check;
398         int full_search = 0;
399         int factor = 10;
400         int wrapped = 0;
401
402         if (data & BTRFS_BLOCK_GROUP_METADATA)
403                 factor = 9;
404
405         if (search_start) {
406                 struct btrfs_block_group_cache *shint;
407                 shint = btrfs_lookup_first_block_group(info, search_start);
408                 if (shint && block_group_bits(shint, data) && !shint->ro) {
409                         spin_lock(&shint->lock);
410                         used = btrfs_block_group_used(&shint->item);
411                         if (used + shint->pinned + shint->reserved <
412                             div_factor(shint->key.offset, factor)) {
413                                 spin_unlock(&shint->lock);
414                                 return shint;
415                         }
416                         spin_unlock(&shint->lock);
417                 }
418         }
419         if (hint && !hint->ro && block_group_bits(hint, data)) {
420                 spin_lock(&hint->lock);
421                 used = btrfs_block_group_used(&hint->item);
422                 if (used + hint->pinned + hint->reserved <
423                     div_factor(hint->key.offset, factor)) {
424                         spin_unlock(&hint->lock);
425                         return hint;
426                 }
427                 spin_unlock(&hint->lock);
428                 last = hint->key.objectid + hint->key.offset;
429         } else {
430                 if (hint)
431                         last = max(hint->key.objectid, search_start);
432                 else
433                         last = search_start;
434         }
435 again:
436         while (1) {
437                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
438                 if (!cache)
439                         break;
440
441                 spin_lock(&cache->lock);
442                 last = cache->key.objectid + cache->key.offset;
443                 used = btrfs_block_group_used(&cache->item);
444
445                 if (!cache->ro && block_group_bits(cache, data)) {
446                         free_check = div_factor(cache->key.offset, factor);
447                         if (used + cache->pinned + cache->reserved <
448                             free_check) {
449                                 found_group = cache;
450                                 spin_unlock(&cache->lock);
451                                 goto found;
452                         }
453                 }
454                 spin_unlock(&cache->lock);
455                 cond_resched();
456         }
457         if (!wrapped) {
458                 last = search_start;
459                 wrapped = 1;
460                 goto again;
461         }
462         if (!full_search && factor < 10) {
463                 last = search_start;
464                 full_search = 1;
465                 factor = 10;
466                 goto again;
467         }
468 found:
469         return found_group;
470 }
471
472 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
473                                                  struct btrfs_block_group_cache
474                                                  *hint, u64 search_start,
475                                                  int data, int owner)
476 {
477
478         struct btrfs_block_group_cache *ret;
479         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
480         return ret;
481 }
482
483 /* simple helper to search for an existing extent at a given offset */
484 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
485 {
486         int ret;
487         struct btrfs_key key;
488         struct btrfs_path *path;
489
490         path = btrfs_alloc_path();
491         BUG_ON(!path);
492         maybe_lock_mutex(root);
493         key.objectid = start;
494         key.offset = len;
495         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
496         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
497                                 0, 0);
498         maybe_unlock_mutex(root);
499         btrfs_free_path(path);
500         return ret;
501 }
502
503 /*
504  * Back reference rules.  Back refs have three main goals:
505  *
506  * 1) differentiate between all holders of references to an extent so that
507  *    when a reference is dropped we can make sure it was a valid reference
508  *    before freeing the extent.
509  *
510  * 2) Provide enough information to quickly find the holders of an extent
511  *    if we notice a given block is corrupted or bad.
512  *
513  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
514  *    maintenance.  This is actually the same as #2, but with a slightly
515  *    different use case.
516  *
517  * File extents can be referenced by:
518  *
519  * - multiple snapshots, subvolumes, or different generations in one subvol
520  * - different files inside a single subvolume
521  * - different offsets inside a file (bookend extents in file.c)
522  *
523  * The extent ref structure has fields for:
524  *
525  * - Objectid of the subvolume root
526  * - Generation number of the tree holding the reference
527  * - objectid of the file holding the reference
528  * - offset in the file corresponding to the key holding the reference
529  * - number of references holding by parent node (alway 1 for tree blocks)
530  *
531  * Btree leaf may hold multiple references to a file extent. In most cases,
532  * these references are from same file and the corresponding offsets inside
533  * the file are close together. So inode objectid and offset in file are
534  * just hints, they provide hints about where in the btree the references
535  * can be found and when we can stop searching.
536  *
537  * When a file extent is allocated the fields are filled in:
538  *     (root_key.objectid, trans->transid, inode objectid, offset in file, 1)
539  *
540  * When a leaf is cow'd new references are added for every file extent found
541  * in the leaf.  It looks similar to the create case, but trans->transid will
542  * be different when the block is cow'd.
543  *
544  *     (root_key.objectid, trans->transid, inode objectid, offset in file,
545  *      number of references in the leaf)
546  *
547  * Because inode objectid and offset in file are just hints, they are not
548  * used when backrefs are deleted. When a file extent is removed either
549  * during snapshot deletion or file truncation, we find the corresponding
550  * back back reference and check the following fields.
551  *
552  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf))
553  *
554  * Btree extents can be referenced by:
555  *
556  * - Different subvolumes
557  * - Different generations of the same subvolume
558  *
559  * When a tree block is created, back references are inserted:
560  *
561  * (root->root_key.objectid, trans->transid, level, 0, 1)
562  *
563  * When a tree block is cow'd, new back references are added for all the
564  * blocks it points to. If the tree block isn't in reference counted root,
565  * the old back references are removed. These new back references are of
566  * the form (trans->transid will have increased since creation):
567  *
568  * (root->root_key.objectid, trans->transid, level, 0, 1)
569  *
570  * When a backref is in deleting, the following fields are checked:
571  *
572  * if backref was for a tree root:
573  *     (btrfs_header_owner(itself), btrfs_header_generation(itself))
574  * else
575  *     (btrfs_header_owner(parent), btrfs_header_generation(parent))
576  *
577  * Back Reference Key composing:
578  *
579  * The key objectid corresponds to the first byte in the extent, the key
580  * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
581  * byte of parent extent. If a extent is tree root, the key offset is set
582  * to the key objectid.
583  */
584
585 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
586                                           struct btrfs_root *root,
587                                           struct btrfs_path *path, u64 bytenr,
588                                           u64 parent, u64 ref_root,
589                                           u64 ref_generation, int del)
590 {
591         struct btrfs_key key;
592         struct btrfs_extent_ref *ref;
593         struct extent_buffer *leaf;
594         int ret;
595
596         key.objectid = bytenr;
597         key.type = BTRFS_EXTENT_REF_KEY;
598         key.offset = parent;
599
600         ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
601         if (ret < 0)
602                 goto out;
603         if (ret > 0) {
604                 ret = -ENOENT;
605                 goto out;
606         }
607
608         leaf = path->nodes[0];
609         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
610         if (btrfs_ref_root(leaf, ref) != ref_root ||
611             btrfs_ref_generation(leaf, ref) != ref_generation) {
612                 ret = -EIO;
613                 WARN_ON(1);
614                 goto out;
615         }
616         ret = 0;
617 out:
618         return ret;
619 }
620
621 static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
622                                           struct btrfs_root *root,
623                                           struct btrfs_path *path,
624                                           u64 bytenr, u64 parent,
625                                           u64 ref_root, u64 ref_generation,
626                                           u64 owner_objectid, u64 owner_offset)
627 {
628         struct btrfs_key key;
629         struct extent_buffer *leaf;
630         struct btrfs_extent_ref *ref;
631         u32 num_refs;
632         int ret;
633
634         key.objectid = bytenr;
635         key.type = BTRFS_EXTENT_REF_KEY;
636         key.offset = parent;
637
638         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
639         if (ret == 0) {
640                 leaf = path->nodes[0];
641                 ref = btrfs_item_ptr(leaf, path->slots[0],
642                                      struct btrfs_extent_ref);
643                 btrfs_set_ref_root(leaf, ref, ref_root);
644                 btrfs_set_ref_generation(leaf, ref, ref_generation);
645                 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
646                 btrfs_set_ref_offset(leaf, ref, owner_offset);
647                 btrfs_set_ref_num_refs(leaf, ref, 1);
648         } else if (ret == -EEXIST) {
649                 u64 existing_owner;
650                 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
651                 leaf = path->nodes[0];
652                 ref = btrfs_item_ptr(leaf, path->slots[0],
653                                      struct btrfs_extent_ref);
654                 if (btrfs_ref_root(leaf, ref) != ref_root ||
655                     btrfs_ref_generation(leaf, ref) != ref_generation) {
656                         ret = -EIO;
657                         WARN_ON(1);
658                         goto out;
659                 }
660
661                 num_refs = btrfs_ref_num_refs(leaf, ref);
662                 BUG_ON(num_refs == 0);
663                 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
664
665                 existing_owner = btrfs_ref_objectid(leaf, ref);
666                 if (existing_owner == owner_objectid &&
667                     btrfs_ref_offset(leaf, ref) > owner_offset) {
668                         btrfs_set_ref_offset(leaf, ref, owner_offset);
669                 } else if (existing_owner != owner_objectid &&
670                            existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
671                         btrfs_set_ref_objectid(leaf, ref,
672                                         BTRFS_MULTIPLE_OBJECTIDS);
673                         btrfs_set_ref_offset(leaf, ref, 0);
674                 }
675                 ret = 0;
676         } else {
677                 goto out;
678         }
679         btrfs_mark_buffer_dirty(path->nodes[0]);
680 out:
681         btrfs_release_path(root, path);
682         return ret;
683 }
684
685 static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
686                                           struct btrfs_root *root,
687                                           struct btrfs_path *path)
688 {
689         struct extent_buffer *leaf;
690         struct btrfs_extent_ref *ref;
691         u32 num_refs;
692         int ret = 0;
693
694         leaf = path->nodes[0];
695         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
696         num_refs = btrfs_ref_num_refs(leaf, ref);
697         BUG_ON(num_refs == 0);
698         num_refs -= 1;
699         if (num_refs == 0) {
700                 ret = btrfs_del_item(trans, root, path);
701         } else {
702                 btrfs_set_ref_num_refs(leaf, ref, num_refs);
703                 btrfs_mark_buffer_dirty(leaf);
704         }
705         btrfs_release_path(root, path);
706         return ret;
707 }
708
709 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
710                                      struct btrfs_root *root, u64 bytenr,
711                                      u64 orig_parent, u64 parent,
712                                      u64 orig_root, u64 ref_root,
713                                      u64 orig_generation, u64 ref_generation,
714                                      u64 owner_objectid, u64 owner_offset)
715 {
716         int ret;
717         struct btrfs_root *extent_root = root->fs_info->extent_root;
718         struct btrfs_path *path;
719
720         if (root == root->fs_info->extent_root) {
721                 struct pending_extent_op *extent_op;
722                 u64 num_bytes;
723
724                 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
725                 num_bytes = btrfs_level_size(root, (int)owner_objectid);
726                 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
727                                 bytenr + num_bytes - 1, EXTENT_LOCKED, 0)) {
728                         u64 priv;
729                         ret = get_state_private(&root->fs_info->extent_ins,
730                                                 bytenr, &priv);
731                         BUG_ON(ret);
732                         extent_op = (struct pending_extent_op *)
733                                                         (unsigned long)priv;
734                         BUG_ON(extent_op->parent != orig_parent);
735                         BUG_ON(extent_op->generation != orig_generation);
736                         extent_op->parent = parent;
737                         extent_op->generation = ref_generation;
738                 } else {
739                         extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
740                         BUG_ON(!extent_op);
741
742                         extent_op->type = PENDING_BACKREF_UPDATE;
743                         extent_op->bytenr = bytenr;
744                         extent_op->num_bytes = num_bytes;
745                         extent_op->parent = parent;
746                         extent_op->orig_parent = orig_parent;
747                         extent_op->generation = ref_generation;
748                         extent_op->orig_generation = orig_generation;
749                         extent_op->level = (int)owner_objectid;
750
751                         set_extent_bits(&root->fs_info->extent_ins,
752                                         bytenr, bytenr + num_bytes - 1,
753                                         EXTENT_LOCKED, GFP_NOFS);
754                         set_state_private(&root->fs_info->extent_ins,
755                                           bytenr, (unsigned long)extent_op);
756                 }
757                 return 0;
758         }
759
760         path = btrfs_alloc_path();
761         if (!path)
762                 return -ENOMEM;
763         ret = lookup_extent_backref(trans, extent_root, path,
764                                     bytenr, orig_parent, orig_root,
765                                     orig_generation, 1);
766         if (ret)
767                 goto out;
768         ret = remove_extent_backref(trans, extent_root, path);
769         if (ret)
770                 goto out;
771         ret = insert_extent_backref(trans, extent_root, path, bytenr,
772                                     parent, ref_root, ref_generation,
773                                     owner_objectid, owner_offset);
774         BUG_ON(ret);
775         finish_current_insert(trans, extent_root);
776         del_pending_extents(trans, extent_root);
777 out:
778         btrfs_free_path(path);
779         return ret;
780 }
781
782 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
783                             struct btrfs_root *root, u64 bytenr,
784                             u64 orig_parent, u64 parent,
785                             u64 ref_root, u64 ref_generation,
786                             u64 owner_objectid, u64 owner_offset)
787 {
788         int ret;
789         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
790             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
791                 return 0;
792         maybe_lock_mutex(root);
793         ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
794                                         parent, ref_root, ref_root,
795                                         ref_generation, ref_generation,
796                                         owner_objectid, owner_offset);
797         maybe_unlock_mutex(root);
798         return ret;
799 }
800
801 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
802                                   struct btrfs_root *root, u64 bytenr,
803                                   u64 orig_parent, u64 parent,
804                                   u64 orig_root, u64 ref_root,
805                                   u64 orig_generation, u64 ref_generation,
806                                   u64 owner_objectid, u64 owner_offset)
807 {
808         struct btrfs_path *path;
809         int ret;
810         struct btrfs_key key;
811         struct extent_buffer *l;
812         struct btrfs_extent_item *item;
813         u32 refs;
814
815         path = btrfs_alloc_path();
816         if (!path)
817                 return -ENOMEM;
818
819         path->reada = 1;
820         key.objectid = bytenr;
821         key.type = BTRFS_EXTENT_ITEM_KEY;
822         key.offset = (u64)-1;
823
824         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
825                                 0, 1);
826         if (ret < 0)
827                 return ret;
828         BUG_ON(ret == 0 || path->slots[0] == 0);
829
830         path->slots[0]--;
831         l = path->nodes[0];
832
833         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
834         BUG_ON(key.objectid != bytenr);
835         BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
836
837         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
838         refs = btrfs_extent_refs(l, item);
839         btrfs_set_extent_refs(l, item, refs + 1);
840         btrfs_mark_buffer_dirty(path->nodes[0]);
841
842         btrfs_release_path(root->fs_info->extent_root, path);
843
844         path->reada = 1;
845         ret = insert_extent_backref(trans, root->fs_info->extent_root,
846                                     path, bytenr, parent,
847                                     ref_root, ref_generation,
848                                     owner_objectid, owner_offset);
849         BUG_ON(ret);
850         finish_current_insert(trans, root->fs_info->extent_root);
851         del_pending_extents(trans, root->fs_info->extent_root);
852
853         btrfs_free_path(path);
854         return 0;
855 }
856
857 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
858                          struct btrfs_root *root,
859                          u64 bytenr, u64 num_bytes, u64 parent,
860                          u64 ref_root, u64 ref_generation,
861                          u64 owner_objectid, u64 owner_offset)
862 {
863         int ret;
864         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
865             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
866                 return 0;
867         maybe_lock_mutex(root);
868         ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
869                                      0, ref_root, 0, ref_generation,
870                                      owner_objectid, owner_offset);
871         maybe_unlock_mutex(root);
872         return ret;
873 }
874
875 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
876                          struct btrfs_root *root)
877 {
878         finish_current_insert(trans, root->fs_info->extent_root);
879         del_pending_extents(trans, root->fs_info->extent_root);
880         return 0;
881 }
882
883 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
884                             struct btrfs_root *root, u64 bytenr,
885                             u64 num_bytes, u32 *refs)
886 {
887         struct btrfs_path *path;
888         int ret;
889         struct btrfs_key key;
890         struct extent_buffer *l;
891         struct btrfs_extent_item *item;
892
893         WARN_ON(num_bytes < root->sectorsize);
894         path = btrfs_alloc_path();
895         path->reada = 1;
896         key.objectid = bytenr;
897         key.offset = num_bytes;
898         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
899         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
900                                 0, 0);
901         if (ret < 0)
902                 goto out;
903         if (ret != 0) {
904                 btrfs_print_leaf(root, path->nodes[0]);
905                 printk("failed to find block number %Lu\n", bytenr);
906                 BUG();
907         }
908         l = path->nodes[0];
909         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
910         *refs = btrfs_extent_refs(l, item);
911 out:
912         btrfs_free_path(path);
913         return 0;
914 }
915
916 static int get_reference_status(struct btrfs_root *root, u64 bytenr,
917                                 u64 parent_gen, u64 ref_objectid,
918                                 u64 *min_generation, u32 *ref_count)
919 {
920         struct btrfs_root *extent_root = root->fs_info->extent_root;
921         struct btrfs_path *path;
922         struct extent_buffer *leaf;
923         struct btrfs_extent_ref *ref_item;
924         struct btrfs_key key;
925         struct btrfs_key found_key;
926         u64 root_objectid = root->root_key.objectid;
927         u64 ref_generation;
928         u32 nritems;
929         int ret;
930
931         key.objectid = bytenr;
932         key.offset = (u64)-1;
933         key.type = BTRFS_EXTENT_ITEM_KEY;
934
935         path = btrfs_alloc_path();
936         mutex_lock(&root->fs_info->alloc_mutex);
937         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
938         if (ret < 0)
939                 goto out;
940         BUG_ON(ret == 0);
941         if (ret < 0 || path->slots[0] == 0)
942                 goto out;
943
944         path->slots[0]--;
945         leaf = path->nodes[0];
946         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
947
948         if (found_key.objectid != bytenr ||
949             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
950                 ret = 1;
951                 goto out;
952         }
953
954         *ref_count = 0;
955         *min_generation = (u64)-1;
956
957         while (1) {
958                 leaf = path->nodes[0];
959                 nritems = btrfs_header_nritems(leaf);
960                 if (path->slots[0] >= nritems) {
961                         ret = btrfs_next_leaf(extent_root, path);
962                         if (ret < 0)
963                                 goto out;
964                         if (ret == 0)
965                                 continue;
966                         break;
967                 }
968                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
969                 if (found_key.objectid != bytenr)
970                         break;
971
972                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
973                         path->slots[0]++;
974                         continue;
975                 }
976
977                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
978                                           struct btrfs_extent_ref);
979                 ref_generation = btrfs_ref_generation(leaf, ref_item);
980                 /*
981                  * For (parent_gen > 0 && parent_gen > ref_generation):
982                  *
983                  * we reach here through the oldest root, therefore
984                  * all other reference from same snapshot should have
985                  * a larger generation.
986                  */
987                 if ((root_objectid != btrfs_ref_root(leaf, ref_item)) ||
988                     (parent_gen > 0 && parent_gen > ref_generation) ||
989                     (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
990                      ref_objectid != btrfs_ref_objectid(leaf, ref_item))) {
991                         *ref_count = 2;
992                         break;
993                 }
994
995                 *ref_count = 1;
996                 if (*min_generation > ref_generation)
997                         *min_generation = ref_generation;
998
999                 path->slots[0]++;
1000         }
1001         ret = 0;
1002 out:
1003         mutex_unlock(&root->fs_info->alloc_mutex);
1004         btrfs_free_path(path);
1005         return ret;
1006 }
1007
1008 int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
1009                            struct btrfs_root *root,
1010                            struct btrfs_key *key, u64 bytenr)
1011 {
1012         struct btrfs_root *old_root;
1013         struct btrfs_path *path = NULL;
1014         struct extent_buffer *eb;
1015         struct btrfs_file_extent_item *item;
1016         u64 ref_generation;
1017         u64 min_generation;
1018         u64 extent_start;
1019         u32 ref_count;
1020         int level;
1021         int ret;
1022
1023         BUG_ON(trans == NULL);
1024         BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
1025         ret = get_reference_status(root, bytenr, 0, key->objectid,
1026                                    &min_generation, &ref_count);
1027         if (ret)
1028                 return ret;
1029
1030         if (ref_count != 1)
1031                 return 1;
1032
1033         old_root = root->dirty_root->root;
1034         ref_generation = old_root->root_key.offset;
1035
1036         /* all references are created in running transaction */
1037         if (min_generation > ref_generation) {
1038                 ret = 0;
1039                 goto out;
1040         }
1041
1042         path = btrfs_alloc_path();
1043         if (!path) {
1044                 ret = -ENOMEM;
1045                 goto out;
1046         }
1047
1048         path->skip_locking = 1;
1049         /* if no item found, the extent is referenced by other snapshot */
1050         ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0);
1051         if (ret)
1052                 goto out;
1053
1054         eb = path->nodes[0];
1055         item = btrfs_item_ptr(eb, path->slots[0],
1056                               struct btrfs_file_extent_item);
1057         if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG ||
1058             btrfs_file_extent_disk_bytenr(eb, item) != bytenr) {
1059                 ret = 1;
1060                 goto out;
1061         }
1062
1063         for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) {
1064                 if (level >= 0) {
1065                         eb = path->nodes[level];
1066                         if (!eb)
1067                                 continue;
1068                         extent_start = eb->start;
1069                 } else
1070                         extent_start = bytenr;
1071
1072                 ret = get_reference_status(root, extent_start, ref_generation,
1073                                            0, &min_generation, &ref_count);
1074                 if (ret)
1075                         goto out;
1076
1077                 if (ref_count != 1) {
1078                         ret = 1;
1079                         goto out;
1080                 }
1081                 if (level >= 0)
1082                         ref_generation = btrfs_header_generation(eb);
1083         }
1084         ret = 0;
1085 out:
1086         if (path)
1087                 btrfs_free_path(path);
1088         return ret;
1089 }
1090
1091 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1092                     struct extent_buffer *buf, u32 nr_extents)
1093 {
1094         struct btrfs_key key;
1095         struct btrfs_file_extent_item *fi;
1096         u64 root_gen;
1097         u32 nritems;
1098         int i;
1099         int level;
1100         int ret = 0;
1101         int shared = 0;
1102
1103         if (!root->ref_cows)
1104                 return 0;
1105
1106         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1107                 shared = 0;
1108                 root_gen = root->root_key.offset;
1109         } else {
1110                 shared = 1;
1111                 root_gen = trans->transid - 1;
1112         }
1113
1114         level = btrfs_header_level(buf);
1115         nritems = btrfs_header_nritems(buf);
1116
1117         if (level == 0) {
1118                 struct btrfs_leaf_ref *ref;
1119                 struct btrfs_extent_info *info;
1120
1121                 ref = btrfs_alloc_leaf_ref(root, nr_extents);
1122                 if (!ref) {
1123                         ret = -ENOMEM;
1124                         goto out;
1125                 }
1126
1127                 ref->root_gen = root_gen;
1128                 ref->bytenr = buf->start;
1129                 ref->owner = btrfs_header_owner(buf);
1130                 ref->generation = btrfs_header_generation(buf);
1131                 ref->nritems = nr_extents;
1132                 info = ref->extents;
1133
1134                 for (i = 0; nr_extents > 0 && i < nritems; i++) {
1135                         u64 disk_bytenr;
1136                         btrfs_item_key_to_cpu(buf, &key, i);
1137                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1138                                 continue;
1139                         fi = btrfs_item_ptr(buf, i,
1140                                             struct btrfs_file_extent_item);
1141                         if (btrfs_file_extent_type(buf, fi) ==
1142                             BTRFS_FILE_EXTENT_INLINE)
1143                                 continue;
1144                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1145                         if (disk_bytenr == 0)
1146                                 continue;
1147
1148                         info->bytenr = disk_bytenr;
1149                         info->num_bytes =
1150                                 btrfs_file_extent_disk_num_bytes(buf, fi);
1151                         info->objectid = key.objectid;
1152                         info->offset = key.offset;
1153                         info++;
1154                 }
1155
1156                 ret = btrfs_add_leaf_ref(root, ref, shared);
1157                 WARN_ON(ret);
1158                 btrfs_free_leaf_ref(root, ref);
1159         }
1160 out:
1161         return ret;
1162 }
1163
1164 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1165                   struct extent_buffer *orig_buf, struct extent_buffer *buf,
1166                   u32 *nr_extents)
1167 {
1168         u64 bytenr;
1169         u64 ref_root;
1170         u64 orig_root;
1171         u64 ref_generation;
1172         u64 orig_generation;
1173         u32 nritems;
1174         u32 nr_file_extents = 0;
1175         struct btrfs_key key;
1176         struct btrfs_file_extent_item *fi;
1177         int i;
1178         int level;
1179         int ret = 0;
1180         int faili = 0;
1181         int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1182                             u64, u64, u64, u64, u64, u64, u64, u64, u64);
1183
1184         ref_root = btrfs_header_owner(buf);
1185         ref_generation = btrfs_header_generation(buf);
1186         orig_root = btrfs_header_owner(orig_buf);
1187         orig_generation = btrfs_header_generation(orig_buf);
1188
1189         nritems = btrfs_header_nritems(buf);
1190         level = btrfs_header_level(buf);
1191
1192         if (root->ref_cows) {
1193                 process_func = __btrfs_inc_extent_ref;
1194         } else {
1195                 if (level == 0 &&
1196                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1197                         goto out;
1198                 if (level != 0 &&
1199                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1200                         goto out;
1201                 process_func = __btrfs_update_extent_ref;
1202         }
1203
1204         for (i = 0; i < nritems; i++) {
1205                 cond_resched();
1206                 if (level == 0) {
1207                         btrfs_item_key_to_cpu(buf, &key, i);
1208                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1209                                 continue;
1210                         fi = btrfs_item_ptr(buf, i,
1211                                             struct btrfs_file_extent_item);
1212                         if (btrfs_file_extent_type(buf, fi) ==
1213                             BTRFS_FILE_EXTENT_INLINE)
1214                                 continue;
1215                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1216                         if (bytenr == 0)
1217                                 continue;
1218
1219                         nr_file_extents++;
1220
1221                         maybe_lock_mutex(root);
1222                         ret = process_func(trans, root, bytenr,
1223                                            orig_buf->start, buf->start,
1224                                            orig_root, ref_root,
1225                                            orig_generation, ref_generation,
1226                                            key.objectid, key.offset);
1227                         maybe_unlock_mutex(root);
1228
1229                         if (ret) {
1230                                 faili = i;
1231                                 WARN_ON(1);
1232                                 goto fail;
1233                         }
1234                 } else {
1235                         bytenr = btrfs_node_blockptr(buf, i);
1236                         maybe_lock_mutex(root);
1237                         ret = process_func(trans, root, bytenr,
1238                                            orig_buf->start, buf->start,
1239                                            orig_root, ref_root,
1240                                            orig_generation, ref_generation,
1241                                            level - 1, 0);
1242                         maybe_unlock_mutex(root);
1243                         if (ret) {
1244                                 faili = i;
1245                                 WARN_ON(1);
1246                                 goto fail;
1247                         }
1248                 }
1249         }
1250 out:
1251         if (nr_extents) {
1252                 if (level == 0)
1253                         *nr_extents = nr_file_extents;
1254                 else
1255                         *nr_extents = nritems;
1256         }
1257         return 0;
1258 fail:
1259         WARN_ON(1);
1260         return ret;
1261 }
1262
1263 int btrfs_update_ref(struct btrfs_trans_handle *trans,
1264                      struct btrfs_root *root, struct extent_buffer *orig_buf,
1265                      struct extent_buffer *buf, int start_slot, int nr)
1266
1267 {
1268         u64 bytenr;
1269         u64 ref_root;
1270         u64 orig_root;
1271         u64 ref_generation;
1272         u64 orig_generation;
1273         struct btrfs_key key;
1274         struct btrfs_file_extent_item *fi;
1275         int i;
1276         int ret;
1277         int slot;
1278         int level;
1279
1280         BUG_ON(start_slot < 0);
1281         BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1282
1283         ref_root = btrfs_header_owner(buf);
1284         ref_generation = btrfs_header_generation(buf);
1285         orig_root = btrfs_header_owner(orig_buf);
1286         orig_generation = btrfs_header_generation(orig_buf);
1287         level = btrfs_header_level(buf);
1288
1289         if (!root->ref_cows) {
1290                 if (level == 0 &&
1291                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1292                         return 0;
1293                 if (level != 0 &&
1294                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1295                         return 0;
1296         }
1297
1298         for (i = 0, slot = start_slot; i < nr; i++, slot++) {
1299                 cond_resched();
1300                 if (level == 0) {
1301                         btrfs_item_key_to_cpu(buf, &key, slot);
1302                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1303                                 continue;
1304                         fi = btrfs_item_ptr(buf, slot,
1305                                             struct btrfs_file_extent_item);
1306                         if (btrfs_file_extent_type(buf, fi) ==
1307                             BTRFS_FILE_EXTENT_INLINE)
1308                                 continue;
1309                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1310                         if (bytenr == 0)
1311                                 continue;
1312                         maybe_lock_mutex(root);
1313                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1314                                             orig_buf->start, buf->start,
1315                                             orig_root, ref_root,
1316                                             orig_generation, ref_generation,
1317                                             key.objectid, key.offset);
1318                         maybe_unlock_mutex(root);
1319                         if (ret)
1320                                 goto fail;
1321                 } else {
1322                         bytenr = btrfs_node_blockptr(buf, slot);
1323                         maybe_lock_mutex(root);
1324                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1325                                             orig_buf->start, buf->start,
1326                                             orig_root, ref_root,
1327                                             orig_generation, ref_generation,
1328                                             level - 1, 0);
1329                         maybe_unlock_mutex(root);
1330                         if (ret)
1331                                 goto fail;
1332                 }
1333         }
1334         return 0;
1335 fail:
1336         WARN_ON(1);
1337         return -1;
1338 }
1339
1340 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1341                                  struct btrfs_root *root,
1342                                  struct btrfs_path *path,
1343                                  struct btrfs_block_group_cache *cache)
1344 {
1345         int ret;
1346         int pending_ret;
1347         struct btrfs_root *extent_root = root->fs_info->extent_root;
1348         unsigned long bi;
1349         struct extent_buffer *leaf;
1350
1351         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1352         if (ret < 0)
1353                 goto fail;
1354         BUG_ON(ret);
1355
1356         leaf = path->nodes[0];
1357         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1358         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1359         btrfs_mark_buffer_dirty(leaf);
1360         btrfs_release_path(extent_root, path);
1361 fail:
1362         finish_current_insert(trans, extent_root);
1363         pending_ret = del_pending_extents(trans, extent_root);
1364         if (ret)
1365                 return ret;
1366         if (pending_ret)
1367                 return pending_ret;
1368         return 0;
1369
1370 }
1371
1372 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1373                                    struct btrfs_root *root)
1374 {
1375         struct btrfs_block_group_cache *cache, *entry;
1376         struct rb_node *n;
1377         int err = 0;
1378         int werr = 0;
1379         struct btrfs_path *path;
1380         u64 last = 0;
1381
1382         path = btrfs_alloc_path();
1383         if (!path)
1384                 return -ENOMEM;
1385
1386         mutex_lock(&root->fs_info->alloc_mutex);
1387         while(1) {
1388                 cache = NULL;
1389                 spin_lock(&root->fs_info->block_group_cache_lock);
1390                 for (n = rb_first(&root->fs_info->block_group_cache_tree);
1391                      n; n = rb_next(n)) {
1392                         entry = rb_entry(n, struct btrfs_block_group_cache,
1393                                          cache_node);
1394                         if (entry->dirty) {
1395                                 cache = entry;
1396                                 break;
1397                         }
1398                 }
1399                 spin_unlock(&root->fs_info->block_group_cache_lock);
1400
1401                 if (!cache)
1402                         break;
1403
1404                 cache->dirty = 0;
1405                 last += cache->key.offset;
1406
1407                 err = write_one_cache_group(trans, root,
1408                                             path, cache);
1409                 /*
1410                  * if we fail to write the cache group, we want
1411                  * to keep it marked dirty in hopes that a later
1412                  * write will work
1413                  */
1414                 if (err) {
1415                         werr = err;
1416                         continue;
1417                 }
1418         }
1419         btrfs_free_path(path);
1420         mutex_unlock(&root->fs_info->alloc_mutex);
1421         return werr;
1422 }
1423
1424 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1425                              u64 total_bytes, u64 bytes_used,
1426                              struct btrfs_space_info **space_info)
1427 {
1428         struct btrfs_space_info *found;
1429
1430         found = __find_space_info(info, flags);
1431         if (found) {
1432                 found->total_bytes += total_bytes;
1433                 found->bytes_used += bytes_used;
1434                 found->full = 0;
1435                 *space_info = found;
1436                 return 0;
1437         }
1438         found = kmalloc(sizeof(*found), GFP_NOFS);
1439         if (!found)
1440                 return -ENOMEM;
1441
1442         list_add(&found->list, &info->space_info);
1443         INIT_LIST_HEAD(&found->block_groups);
1444         spin_lock_init(&found->lock);
1445         found->flags = flags;
1446         found->total_bytes = total_bytes;
1447         found->bytes_used = bytes_used;
1448         found->bytes_pinned = 0;
1449         found->bytes_reserved = 0;
1450         found->full = 0;
1451         found->force_alloc = 0;
1452         *space_info = found;
1453         return 0;
1454 }
1455
1456 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1457 {
1458         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1459                                    BTRFS_BLOCK_GROUP_RAID1 |
1460                                    BTRFS_BLOCK_GROUP_RAID10 |
1461                                    BTRFS_BLOCK_GROUP_DUP);
1462         if (extra_flags) {
1463                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1464                         fs_info->avail_data_alloc_bits |= extra_flags;
1465                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1466                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1467                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1468                         fs_info->avail_system_alloc_bits |= extra_flags;
1469         }
1470 }
1471
1472 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1473 {
1474         u64 num_devices = root->fs_info->fs_devices->num_devices;
1475
1476         if (num_devices == 1)
1477                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1478         if (num_devices < 4)
1479                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1480
1481         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1482             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1483                       BTRFS_BLOCK_GROUP_RAID10))) {
1484                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1485         }
1486
1487         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1488             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1489                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1490         }
1491
1492         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1493             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1494              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1495              (flags & BTRFS_BLOCK_GROUP_DUP)))
1496                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1497         return flags;
1498 }
1499
1500 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1501                           struct btrfs_root *extent_root, u64 alloc_bytes,
1502                           u64 flags, int force)
1503 {
1504         struct btrfs_space_info *space_info;
1505         u64 thresh;
1506         u64 start;
1507         u64 num_bytes;
1508         int ret = 0, waited = 0;
1509
1510         flags = reduce_alloc_profile(extent_root, flags);
1511
1512         space_info = __find_space_info(extent_root->fs_info, flags);
1513         if (!space_info) {
1514                 ret = update_space_info(extent_root->fs_info, flags,
1515                                         0, 0, &space_info);
1516                 BUG_ON(ret);
1517         }
1518         BUG_ON(!space_info);
1519
1520         if (space_info->force_alloc) {
1521                 force = 1;
1522                 space_info->force_alloc = 0;
1523         }
1524         if (space_info->full)
1525                 goto out;
1526
1527         thresh = div_factor(space_info->total_bytes, 6);
1528         if (!force &&
1529            (space_info->bytes_used + space_info->bytes_pinned +
1530             space_info->bytes_reserved + alloc_bytes) < thresh)
1531                 goto out;
1532
1533         while (!mutex_trylock(&extent_root->fs_info->chunk_mutex)) {
1534                 if (!force)
1535                         goto out;
1536                 mutex_unlock(&extent_root->fs_info->alloc_mutex);
1537                 cond_resched();
1538                 mutex_lock(&extent_root->fs_info->alloc_mutex);
1539                 waited = 1;
1540         }
1541
1542         if (waited && space_info->full)
1543                 goto out_unlock;
1544
1545         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1546         if (ret == -ENOSPC) {
1547 printk("space info full %Lu\n", flags);
1548                 space_info->full = 1;
1549                 goto out_unlock;
1550         }
1551         BUG_ON(ret);
1552
1553         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1554                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1555         BUG_ON(ret);
1556
1557 out_unlock:
1558         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1559 out:
1560         return ret;
1561 }
1562
1563 static int update_block_group(struct btrfs_trans_handle *trans,
1564                               struct btrfs_root *root,
1565                               u64 bytenr, u64 num_bytes, int alloc,
1566                               int mark_free)
1567 {
1568         struct btrfs_block_group_cache *cache;
1569         struct btrfs_fs_info *info = root->fs_info;
1570         u64 total = num_bytes;
1571         u64 old_val;
1572         u64 byte_in_group;
1573
1574         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1575         while(total) {
1576                 cache = btrfs_lookup_block_group(info, bytenr);
1577                 if (!cache) {
1578                         return -1;
1579                 }
1580                 byte_in_group = bytenr - cache->key.objectid;
1581                 WARN_ON(byte_in_group > cache->key.offset);
1582
1583                 spin_lock(&cache->lock);
1584                 cache->dirty = 1;
1585                 old_val = btrfs_block_group_used(&cache->item);
1586                 num_bytes = min(total, cache->key.offset - byte_in_group);
1587                 if (alloc) {
1588                         old_val += num_bytes;
1589                         cache->space_info->bytes_used += num_bytes;
1590                         btrfs_set_block_group_used(&cache->item, old_val);
1591                         spin_unlock(&cache->lock);
1592                 } else {
1593                         old_val -= num_bytes;
1594                         cache->space_info->bytes_used -= num_bytes;
1595                         btrfs_set_block_group_used(&cache->item, old_val);
1596                         spin_unlock(&cache->lock);
1597                         if (mark_free) {
1598                                 int ret;
1599                                 ret = btrfs_add_free_space(cache, bytenr,
1600                                                            num_bytes);
1601                                 if (ret)
1602                                         return -1;
1603                         }
1604                 }
1605                 total -= num_bytes;
1606                 bytenr += num_bytes;
1607         }
1608         return 0;
1609 }
1610
1611 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1612 {
1613         struct btrfs_block_group_cache *cache;
1614
1615         cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
1616         if (!cache)
1617                 return 0;
1618
1619         return cache->key.objectid;
1620 }
1621
1622 int btrfs_update_pinned_extents(struct btrfs_root *root,
1623                                 u64 bytenr, u64 num, int pin)
1624 {
1625         u64 len;
1626         struct btrfs_block_group_cache *cache;
1627         struct btrfs_fs_info *fs_info = root->fs_info;
1628
1629         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1630         if (pin) {
1631                 set_extent_dirty(&fs_info->pinned_extents,
1632                                 bytenr, bytenr + num - 1, GFP_NOFS);
1633         } else {
1634                 clear_extent_dirty(&fs_info->pinned_extents,
1635                                 bytenr, bytenr + num - 1, GFP_NOFS);
1636         }
1637         while (num > 0) {
1638                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1639                 BUG_ON(!cache);
1640                 len = min(num, cache->key.offset -
1641                           (bytenr - cache->key.objectid));
1642                 if (pin) {
1643                         spin_lock(&cache->lock);
1644                         cache->pinned += len;
1645                         cache->space_info->bytes_pinned += len;
1646                         spin_unlock(&cache->lock);
1647                         fs_info->total_pinned += len;
1648                 } else {
1649                         spin_lock(&cache->lock);
1650                         cache->pinned -= len;
1651                         cache->space_info->bytes_pinned -= len;
1652                         spin_unlock(&cache->lock);
1653                         fs_info->total_pinned -= len;
1654                 }
1655                 bytenr += len;
1656                 num -= len;
1657         }
1658         return 0;
1659 }
1660
1661 static int update_reserved_extents(struct btrfs_root *root,
1662                                    u64 bytenr, u64 num, int reserve)
1663 {
1664         u64 len;
1665         struct btrfs_block_group_cache *cache;
1666         struct btrfs_fs_info *fs_info = root->fs_info;
1667
1668         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1669         while (num > 0) {
1670                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1671                 BUG_ON(!cache);
1672                 len = min(num, cache->key.offset -
1673                           (bytenr - cache->key.objectid));
1674                 if (reserve) {
1675                         spin_lock(&cache->lock);
1676                         cache->reserved += len;
1677                         cache->space_info->bytes_reserved += len;
1678                         spin_unlock(&cache->lock);
1679                 } else {
1680                         spin_lock(&cache->lock);
1681                         cache->reserved -= len;
1682                         cache->space_info->bytes_reserved -= len;
1683                         spin_unlock(&cache->lock);
1684                 }
1685                 bytenr += len;
1686                 num -= len;
1687         }
1688         return 0;
1689 }
1690
1691 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1692 {
1693         u64 last = 0;
1694         u64 start;
1695         u64 end;
1696         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1697         int ret;
1698
1699         while(1) {
1700                 ret = find_first_extent_bit(pinned_extents, last,
1701                                             &start, &end, EXTENT_DIRTY);
1702                 if (ret)
1703                         break;
1704                 set_extent_dirty(copy, start, end, GFP_NOFS);
1705                 last = end + 1;
1706         }
1707         return 0;
1708 }
1709
1710 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1711                                struct btrfs_root *root,
1712                                struct extent_io_tree *unpin)
1713 {
1714         u64 start;
1715         u64 end;
1716         int ret;
1717         struct btrfs_block_group_cache *cache;
1718
1719         mutex_lock(&root->fs_info->alloc_mutex);
1720         while(1) {
1721                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1722                                             EXTENT_DIRTY);
1723                 if (ret)
1724                         break;
1725                 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
1726                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1727                 cache = btrfs_lookup_block_group(root->fs_info, start);
1728                 if (cache->cached)
1729                         btrfs_add_free_space(cache, start, end - start + 1);
1730                 if (need_resched()) {
1731                         mutex_unlock(&root->fs_info->alloc_mutex);
1732                         cond_resched();
1733                         mutex_lock(&root->fs_info->alloc_mutex);
1734                 }
1735         }
1736         mutex_unlock(&root->fs_info->alloc_mutex);
1737         return 0;
1738 }
1739
1740 static int finish_current_insert(struct btrfs_trans_handle *trans,
1741                                  struct btrfs_root *extent_root)
1742 {
1743         u64 start;
1744         u64 end;
1745         u64 priv;
1746         struct btrfs_fs_info *info = extent_root->fs_info;
1747         struct btrfs_path *path;
1748         struct btrfs_extent_ref *ref;
1749         struct pending_extent_op *extent_op;
1750         struct btrfs_key key;
1751         struct btrfs_extent_item extent_item;
1752         int ret;
1753         int err = 0;
1754
1755         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1756         btrfs_set_stack_extent_refs(&extent_item, 1);
1757         path = btrfs_alloc_path();
1758
1759         while(1) {
1760                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1761                                             &end, EXTENT_LOCKED);
1762                 if (ret)
1763                         break;
1764
1765                 ret = get_state_private(&info->extent_ins, start, &priv);
1766                 BUG_ON(ret);
1767                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
1768
1769                 if (extent_op->type == PENDING_EXTENT_INSERT) {
1770                         key.objectid = start;
1771                         key.offset = end + 1 - start;
1772                         key.type = BTRFS_EXTENT_ITEM_KEY;
1773                         err = btrfs_insert_item(trans, extent_root, &key,
1774                                         &extent_item, sizeof(extent_item));
1775                         BUG_ON(err);
1776
1777                         clear_extent_bits(&info->extent_ins, start, end,
1778                                           EXTENT_LOCKED, GFP_NOFS);
1779
1780                         err = insert_extent_backref(trans, extent_root, path,
1781                                                 start, extent_op->parent,
1782                                                 extent_root->root_key.objectid,
1783                                                 extent_op->generation,
1784                                                 extent_op->level, 0);
1785                         BUG_ON(err);
1786                 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
1787                         err = lookup_extent_backref(trans, extent_root, path,
1788                                                 start, extent_op->orig_parent,
1789                                                 extent_root->root_key.objectid,
1790                                                 extent_op->orig_generation, 0);
1791                         BUG_ON(err);
1792
1793                         clear_extent_bits(&info->extent_ins, start, end,
1794                                           EXTENT_LOCKED, GFP_NOFS);
1795
1796                         key.objectid = start;
1797                         key.offset = extent_op->parent;
1798                         key.type = BTRFS_EXTENT_REF_KEY;
1799                         err = btrfs_set_item_key_safe(trans, extent_root, path,
1800                                                       &key);
1801                         BUG_ON(err);
1802                         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1803                                              struct btrfs_extent_ref);
1804                         btrfs_set_ref_generation(path->nodes[0], ref,
1805                                                  extent_op->generation);
1806                         btrfs_mark_buffer_dirty(path->nodes[0]);
1807                         btrfs_release_path(extent_root, path);
1808                 } else {
1809                         BUG_ON(1);
1810                 }
1811                 kfree(extent_op);
1812
1813                 if (need_resched()) {
1814                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
1815                         cond_resched();
1816                         mutex_lock(&extent_root->fs_info->alloc_mutex);
1817                 }
1818         }
1819         btrfs_free_path(path);
1820         return 0;
1821 }
1822
1823 static int pin_down_bytes(struct btrfs_trans_handle *trans,
1824                           struct btrfs_root *root,
1825                           u64 bytenr, u64 num_bytes, int is_data)
1826 {
1827         int err = 0;
1828         struct extent_buffer *buf;
1829
1830         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1831         if (is_data)
1832                 goto pinit;
1833
1834         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1835         if (!buf)
1836                 goto pinit;
1837
1838         /* we can reuse a block if it hasn't been written
1839          * and it is from this transaction.  We can't
1840          * reuse anything from the tree log root because
1841          * it has tiny sub-transactions.
1842          */
1843         if (btrfs_buffer_uptodate(buf, 0) &&
1844             btrfs_try_tree_lock(buf)) {
1845                 u64 header_owner = btrfs_header_owner(buf);
1846                 u64 header_transid = btrfs_header_generation(buf);
1847                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
1848                     header_owner != BTRFS_TREE_RELOC_OBJECTID &&
1849                     header_transid == trans->transid &&
1850                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
1851                         clean_tree_block(NULL, root, buf);
1852                         btrfs_tree_unlock(buf);
1853                         free_extent_buffer(buf);
1854                         return 1;
1855                 }
1856                 btrfs_tree_unlock(buf);
1857         }
1858         free_extent_buffer(buf);
1859 pinit:
1860         btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
1861
1862         BUG_ON(err < 0);
1863         return 0;
1864 }
1865
1866 /*
1867  * remove an extent from the root, returns 0 on success
1868  */
1869 static int __free_extent(struct btrfs_trans_handle *trans,
1870                          struct btrfs_root *root,
1871                          u64 bytenr, u64 num_bytes, u64 parent,
1872                          u64 root_objectid, u64 ref_generation,
1873                          u64 owner_objectid, u64 owner_offset,
1874                          int pin, int mark_free)
1875 {
1876         struct btrfs_path *path;
1877         struct btrfs_key key;
1878         struct btrfs_fs_info *info = root->fs_info;
1879         struct btrfs_root *extent_root = info->extent_root;
1880         struct extent_buffer *leaf;
1881         int ret;
1882         int extent_slot = 0;
1883         int found_extent = 0;
1884         int num_to_del = 1;
1885         struct btrfs_extent_item *ei;
1886         u32 refs;
1887
1888         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1889         key.objectid = bytenr;
1890         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1891         key.offset = num_bytes;
1892         path = btrfs_alloc_path();
1893         if (!path)
1894                 return -ENOMEM;
1895
1896         path->reada = 1;
1897         ret = lookup_extent_backref(trans, extent_root, path, bytenr, parent,
1898                                     root_objectid, ref_generation, 1);
1899         if (ret == 0) {
1900                 struct btrfs_key found_key;
1901                 extent_slot = path->slots[0];
1902                 while(extent_slot > 0) {
1903                         extent_slot--;
1904                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1905                                               extent_slot);
1906                         if (found_key.objectid != bytenr)
1907                                 break;
1908                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1909                             found_key.offset == num_bytes) {
1910                                 found_extent = 1;
1911                                 break;
1912                         }
1913                         if (path->slots[0] - extent_slot > 5)
1914                                 break;
1915                 }
1916                 if (!found_extent) {
1917                         ret = remove_extent_backref(trans, extent_root, path);
1918                         BUG_ON(ret);
1919                         btrfs_release_path(extent_root, path);
1920                         ret = btrfs_search_slot(trans, extent_root,
1921                                                 &key, path, -1, 1);
1922                         BUG_ON(ret);
1923                         extent_slot = path->slots[0];
1924                 }
1925         } else {
1926                 btrfs_print_leaf(extent_root, path->nodes[0]);
1927                 WARN_ON(1);
1928                 printk("Unable to find ref byte nr %Lu root %Lu "
1929                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1930                        root_objectid, ref_generation, owner_objectid,
1931                        owner_offset);
1932         }
1933
1934         leaf = path->nodes[0];
1935         ei = btrfs_item_ptr(leaf, extent_slot,
1936                             struct btrfs_extent_item);
1937         refs = btrfs_extent_refs(leaf, ei);
1938         BUG_ON(refs == 0);
1939         refs -= 1;
1940         btrfs_set_extent_refs(leaf, ei, refs);
1941
1942         btrfs_mark_buffer_dirty(leaf);
1943
1944         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1945                 struct btrfs_extent_ref *ref;
1946                 ref = btrfs_item_ptr(leaf, path->slots[0],
1947                                      struct btrfs_extent_ref);
1948                 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
1949                 /* if the back ref and the extent are next to each other
1950                  * they get deleted below in one shot
1951                  */
1952                 path->slots[0] = extent_slot;
1953                 num_to_del = 2;
1954         } else if (found_extent) {
1955                 /* otherwise delete the extent back ref */
1956                 ret = remove_extent_backref(trans, extent_root, path);
1957                 BUG_ON(ret);
1958                 /* if refs are 0, we need to setup the path for deletion */
1959                 if (refs == 0) {
1960                         btrfs_release_path(extent_root, path);
1961                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1962                                                 -1, 1);
1963                         BUG_ON(ret);
1964                 }
1965         }
1966
1967         if (refs == 0) {
1968                 u64 super_used;
1969                 u64 root_used;
1970 #ifdef BIO_RW_DISCARD
1971                 u64 map_length = num_bytes;
1972                 struct btrfs_multi_bio *multi = NULL;
1973 #endif
1974
1975                 if (pin) {
1976                         ret = pin_down_bytes(trans, root, bytenr, num_bytes,
1977                                 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
1978                         if (ret > 0)
1979                                 mark_free = 1;
1980                         BUG_ON(ret < 0);
1981                 }
1982
1983                 /* block accounting for super block */
1984                 spin_lock_irq(&info->delalloc_lock);
1985                 super_used = btrfs_super_bytes_used(&info->super_copy);
1986                 btrfs_set_super_bytes_used(&info->super_copy,
1987                                            super_used - num_bytes);
1988                 spin_unlock_irq(&info->delalloc_lock);
1989
1990                 /* block accounting for root item */
1991                 root_used = btrfs_root_used(&root->root_item);
1992                 btrfs_set_root_used(&root->root_item,
1993                                            root_used - num_bytes);
1994                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1995                                       num_to_del);
1996                 BUG_ON(ret);
1997                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1998                                          mark_free);
1999                 BUG_ON(ret);
2000
2001 #ifdef BIO_RW_DISCARD
2002                 /* Tell the block device(s) that the sectors can be discarded */
2003                 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
2004                                       bytenr, &map_length, &multi, 0);
2005                 if (!ret) {
2006                         struct btrfs_bio_stripe *stripe = multi->stripes;
2007                         int i;
2008
2009                         if (map_length > num_bytes)
2010                                 map_length = num_bytes;
2011
2012                         for (i = 0; i < multi->num_stripes; i++, stripe++) {
2013                                 blkdev_issue_discard(stripe->dev->bdev,
2014                                                      stripe->physical >> 9,
2015                                                      map_length >> 9);
2016                         }
2017                         kfree(multi);
2018                 }
2019 #endif
2020         }
2021         btrfs_free_path(path);
2022         finish_current_insert(trans, extent_root);
2023         return ret;
2024 }
2025
2026 /*
2027  * find all the blocks marked as pending in the radix tree and remove
2028  * them from the extent map
2029  */
2030 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
2031                                btrfs_root *extent_root)
2032 {
2033         int ret;
2034         int err = 0;
2035         int mark_free = 0;
2036         u64 start;
2037         u64 end;
2038         u64 priv;
2039         struct extent_io_tree *pending_del;
2040         struct extent_io_tree *extent_ins;
2041         struct pending_extent_op *extent_op;
2042
2043         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
2044         extent_ins = &extent_root->fs_info->extent_ins;
2045         pending_del = &extent_root->fs_info->pending_del;
2046
2047         while(1) {
2048                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
2049                                             EXTENT_LOCKED);
2050                 if (ret)
2051                         break;
2052
2053                 ret = get_state_private(pending_del, start, &priv);
2054                 BUG_ON(ret);
2055                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2056
2057                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
2058                                   GFP_NOFS);
2059
2060                 ret = pin_down_bytes(trans, extent_root, start,
2061                                      end + 1 - start, 0);
2062                 mark_free = ret > 0;
2063                 if (!test_range_bit(extent_ins, start, end,
2064                                     EXTENT_LOCKED, 0)) {
2065 free_extent:
2066                         ret = __free_extent(trans, extent_root,
2067                                             start, end + 1 - start,
2068                                             extent_op->orig_parent,
2069                                             extent_root->root_key.objectid,
2070                                             extent_op->orig_generation,
2071                                             extent_op->level, 0, 0, mark_free);
2072                         kfree(extent_op);
2073                 } else {
2074                         kfree(extent_op);
2075                         ret = get_state_private(extent_ins, start, &priv);
2076                         BUG_ON(ret);
2077                         extent_op = (struct pending_extent_op *)
2078                                                         (unsigned long)priv;
2079
2080                         clear_extent_bits(extent_ins, start, end,
2081                                           EXTENT_LOCKED, GFP_NOFS);
2082
2083                         if (extent_op->type == PENDING_BACKREF_UPDATE)
2084                                 goto free_extent;
2085
2086                         ret = update_block_group(trans, extent_root, start,
2087                                                 end + 1 - start, 0, mark_free);
2088                         BUG_ON(ret);
2089                         kfree(extent_op);
2090                 }
2091                 if (ret)
2092                         err = ret;
2093
2094                 if (need_resched()) {
2095                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2096                         cond_resched();
2097                         mutex_lock(&extent_root->fs_info->alloc_mutex);
2098                 }
2099         }
2100         return err;
2101 }
2102
2103 /*
2104  * remove an extent from the root, returns 0 on success
2105  */
2106 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2107                                struct btrfs_root *root,
2108                                u64 bytenr, u64 num_bytes, u64 parent,
2109                                u64 root_objectid, u64 ref_generation,
2110                                u64 owner_objectid, u64 owner_offset, int pin)
2111 {
2112         struct btrfs_root *extent_root = root->fs_info->extent_root;
2113         int pending_ret;
2114         int ret;
2115
2116         WARN_ON(num_bytes < root->sectorsize);
2117         if (root == extent_root) {
2118                 struct pending_extent_op *extent_op;
2119
2120                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2121                 BUG_ON(!extent_op);
2122
2123                 extent_op->type = PENDING_EXTENT_DELETE;
2124                 extent_op->bytenr = bytenr;
2125                 extent_op->num_bytes = num_bytes;
2126                 extent_op->parent = parent;
2127                 extent_op->orig_parent = parent;
2128                 extent_op->generation = ref_generation;
2129                 extent_op->orig_generation = ref_generation;
2130                 extent_op->level = (int)owner_objectid;
2131
2132                 set_extent_bits(&root->fs_info->pending_del,
2133                                 bytenr, bytenr + num_bytes - 1,
2134                                 EXTENT_LOCKED, GFP_NOFS);
2135                 set_state_private(&root->fs_info->pending_del,
2136                                   bytenr, (unsigned long)extent_op);
2137                 return 0;
2138         }
2139         /* if metadata always pin */
2140         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2141                 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2142                         struct btrfs_block_group_cache *cache;
2143
2144                         /* btrfs_free_reserved_extent */
2145                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
2146                         BUG_ON(!cache);
2147                         btrfs_add_free_space(cache, bytenr, num_bytes);
2148                         update_reserved_extents(root, bytenr, num_bytes, 0);
2149                         return 0;
2150                 }
2151                 pin = 1;
2152         }
2153
2154         /* if data pin when any transaction has committed this */
2155         if (ref_generation != trans->transid)
2156                 pin = 1;
2157
2158         ret = __free_extent(trans, root, bytenr, num_bytes, parent,
2159                             root_objectid, ref_generation, owner_objectid,
2160                             owner_offset, pin, pin == 0);
2161
2162         finish_current_insert(trans, root->fs_info->extent_root);
2163         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
2164         return ret ? ret : pending_ret;
2165 }
2166
2167 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2168                       struct btrfs_root *root,
2169                       u64 bytenr, u64 num_bytes, u64 parent,
2170                       u64 root_objectid, u64 ref_generation,
2171                       u64 owner_objectid, u64 owner_offset, int pin)
2172 {
2173         int ret;
2174
2175         maybe_lock_mutex(root);
2176         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent,
2177                                   root_objectid, ref_generation,
2178                                   owner_objectid, owner_offset, pin);
2179         maybe_unlock_mutex(root);
2180         return ret;
2181 }
2182
2183 static u64 stripe_align(struct btrfs_root *root, u64 val)
2184 {
2185         u64 mask = ((u64)root->stripesize - 1);
2186         u64 ret = (val + mask) & ~mask;
2187         return ret;
2188 }
2189
2190 /*
2191  * walks the btree of allocated extents and find a hole of a given size.
2192  * The key ins is changed to record the hole:
2193  * ins->objectid == block start
2194  * ins->flags = BTRFS_EXTENT_ITEM_KEY
2195  * ins->offset == number of blocks
2196  * Any available blocks before search_start are skipped.
2197  */
2198 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2199                                      struct btrfs_root *orig_root,
2200                                      u64 num_bytes, u64 empty_size,
2201                                      u64 search_start, u64 search_end,
2202                                      u64 hint_byte, struct btrfs_key *ins,
2203                                      u64 exclude_start, u64 exclude_nr,
2204                                      int data)
2205 {
2206         int ret;
2207         u64 orig_search_start;
2208         struct btrfs_root * root = orig_root->fs_info->extent_root;
2209         struct btrfs_fs_info *info = root->fs_info;
2210         u64 total_needed = num_bytes;
2211         u64 *last_ptr = NULL;
2212         struct btrfs_block_group_cache *block_group;
2213         int chunk_alloc_done = 0;
2214         int empty_cluster = 2 * 1024 * 1024;
2215         int allowed_chunk_alloc = 0;
2216
2217         WARN_ON(num_bytes < root->sectorsize);
2218         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
2219
2220         if (orig_root->ref_cows || empty_size)
2221                 allowed_chunk_alloc = 1;
2222
2223         if (data & BTRFS_BLOCK_GROUP_METADATA) {
2224                 last_ptr = &root->fs_info->last_alloc;
2225                 empty_cluster = 256 * 1024;
2226         }
2227
2228         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
2229                 last_ptr = &root->fs_info->last_data_alloc;
2230
2231         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2232                 last_ptr = &root->fs_info->last_log_alloc;
2233                 if (!last_ptr == 0 && root->fs_info->last_alloc) {
2234                         *last_ptr = root->fs_info->last_alloc + empty_cluster;
2235                 }
2236         }
2237
2238         if (last_ptr) {
2239                 if (*last_ptr)
2240                         hint_byte = *last_ptr;
2241                 else
2242                         empty_size += empty_cluster;
2243         }
2244
2245         search_start = max(search_start, first_logical_byte(root, 0));
2246         orig_search_start = search_start;
2247
2248         search_start = max(search_start, hint_byte);
2249         total_needed += empty_size;
2250
2251 new_group:
2252         block_group = btrfs_lookup_block_group(info, search_start);
2253         if (!block_group)
2254                 block_group = btrfs_lookup_first_block_group(info,
2255                                                              search_start);
2256
2257         /*
2258          * Ok this looks a little tricky, buts its really simple.  First if we
2259          * didn't find a block group obviously we want to start over.
2260          * Secondly, if the block group we found does not match the type we
2261          * need, and we have a last_ptr and its not 0, chances are the last
2262          * allocation we made was at the end of the block group, so lets go
2263          * ahead and skip the looking through the rest of the block groups and
2264          * start at the beginning.  This helps with metadata allocations,
2265          * since you are likely to have a bunch of data block groups to search
2266          * through first before you realize that you need to start over, so go
2267          * ahead and start over and save the time.
2268          */
2269         if (!block_group || (!block_group_bits(block_group, data) &&
2270                              last_ptr && *last_ptr)) {
2271                 if (search_start != orig_search_start) {
2272                         if (last_ptr && *last_ptr) {
2273                                 total_needed += empty_cluster;
2274                                 *last_ptr = 0;
2275                         }
2276                         search_start = orig_search_start;
2277                         goto new_group;
2278                 } else if (!chunk_alloc_done && allowed_chunk_alloc) {
2279                         ret = do_chunk_alloc(trans, root,
2280                                              num_bytes + 2 * 1024 * 1024,
2281                                              data, 1);
2282                         if (ret < 0)
2283                                 goto error;
2284                         BUG_ON(ret);
2285                         chunk_alloc_done = 1;
2286                         search_start = orig_search_start;
2287                         goto new_group;
2288                 } else {
2289                         ret = -ENOSPC;
2290                         goto error;
2291                 }
2292         }
2293
2294         /*
2295          * this is going to seach through all of the existing block groups it
2296          * can find, so if we don't find something we need to see if we can
2297          * allocate what we need.
2298          */
2299         ret = find_free_space(root, &block_group, &search_start,
2300                               total_needed, data);
2301         if (ret == -ENOSPC) {
2302                 /*
2303                  * instead of allocating, start at the original search start
2304                  * and see if there is something to be found, if not then we
2305                  * allocate
2306                  */
2307                 if (search_start != orig_search_start) {
2308                         if (last_ptr && *last_ptr) {
2309                                 *last_ptr = 0;
2310                                 total_needed += empty_cluster;
2311                         }
2312                         search_start = orig_search_start;
2313                         goto new_group;
2314                 }
2315
2316                 /*
2317                  * we've already allocated, we're pretty screwed
2318                  */
2319                 if (chunk_alloc_done) {
2320                         goto error;
2321                 } else if (!allowed_chunk_alloc && block_group &&
2322                            block_group_bits(block_group, data)) {
2323                         block_group->space_info->force_alloc = 1;
2324                         goto error;
2325                 } else if (!allowed_chunk_alloc) {
2326                         goto error;
2327                 }
2328
2329                 ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024,
2330                                      data, 1);
2331                 if (ret < 0)
2332                         goto error;
2333
2334                 BUG_ON(ret);
2335                 chunk_alloc_done = 1;
2336                 if (block_group)
2337                         search_start = block_group->key.objectid +
2338                                 block_group->key.offset;
2339                 else
2340                         search_start = orig_search_start;
2341                 goto new_group;
2342         }
2343
2344         if (ret)
2345                 goto error;
2346
2347         search_start = stripe_align(root, search_start);
2348         ins->objectid = search_start;
2349         ins->offset = num_bytes;
2350
2351         if (ins->objectid + num_bytes >= search_end) {
2352                 search_start = orig_search_start;
2353                 if (chunk_alloc_done) {
2354                         ret = -ENOSPC;
2355                         goto error;
2356                 }
2357                 goto new_group;
2358         }
2359
2360         if (ins->objectid + num_bytes >
2361             block_group->key.objectid + block_group->key.offset) {
2362                 if (search_start == orig_search_start && chunk_alloc_done) {
2363                         ret = -ENOSPC;
2364                         goto error;
2365                 }
2366                 search_start = block_group->key.objectid +
2367                         block_group->key.offset;
2368                 goto new_group;
2369         }
2370
2371         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
2372             ins->objectid < exclude_start + exclude_nr)) {
2373                 search_start = exclude_start + exclude_nr;
2374                 goto new_group;
2375         }
2376
2377         if (!(data & BTRFS_BLOCK_GROUP_DATA))
2378                 trans->block_group = block_group;
2379
2380         ins->offset = num_bytes;
2381         if (last_ptr) {
2382                 *last_ptr = ins->objectid + ins->offset;
2383                 if (*last_ptr ==
2384                     btrfs_super_total_bytes(&root->fs_info->super_copy))
2385                         *last_ptr = 0;
2386         }
2387
2388         ret = 0;
2389 error:
2390         return ret;
2391 }
2392
2393 static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
2394 {
2395         struct btrfs_block_group_cache *cache;
2396         struct list_head *l;
2397
2398         printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
2399                info->total_bytes - info->bytes_used - info->bytes_pinned -
2400                info->bytes_reserved, (info->full) ? "" : "not ");
2401
2402         spin_lock(&info->lock);
2403         list_for_each(l, &info->block_groups) {
2404                 cache = list_entry(l, struct btrfs_block_group_cache, list);
2405                 spin_lock(&cache->lock);
2406                 printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
2407                        "%Lu pinned %Lu reserved\n",
2408                        cache->key.objectid, cache->key.offset,
2409                        btrfs_block_group_used(&cache->item),
2410                        cache->pinned, cache->reserved);
2411                 btrfs_dump_free_space(cache, bytes);
2412                 spin_unlock(&cache->lock);
2413         }
2414         spin_unlock(&info->lock);
2415 }
2416
2417 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2418                                   struct btrfs_root *root,
2419                                   u64 num_bytes, u64 min_alloc_size,
2420                                   u64 empty_size, u64 hint_byte,
2421                                   u64 search_end, struct btrfs_key *ins,
2422                                   u64 data)
2423 {
2424         int ret;
2425         u64 search_start = 0;
2426         u64 alloc_profile;
2427         struct btrfs_fs_info *info = root->fs_info;
2428         struct btrfs_block_group_cache *cache;
2429
2430         if (data) {
2431                 alloc_profile = info->avail_data_alloc_bits &
2432                                 info->data_alloc_profile;
2433                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2434         } else if (root == root->fs_info->chunk_root) {
2435                 alloc_profile = info->avail_system_alloc_bits &
2436                                 info->system_alloc_profile;
2437                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2438         } else {
2439                 alloc_profile = info->avail_metadata_alloc_bits &
2440                                 info->metadata_alloc_profile;
2441                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2442         }
2443 again:
2444         data = reduce_alloc_profile(root, data);
2445         /*
2446          * the only place that sets empty_size is btrfs_realloc_node, which
2447          * is not called recursively on allocations
2448          */
2449         if (empty_size || root->ref_cows) {
2450                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
2451                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2452                                      2 * 1024 * 1024,
2453                                      BTRFS_BLOCK_GROUP_METADATA |
2454                                      (info->metadata_alloc_profile &
2455                                       info->avail_metadata_alloc_bits), 0);
2456                 }
2457                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2458                                      num_bytes + 2 * 1024 * 1024, data, 0);
2459         }
2460
2461         WARN_ON(num_bytes < root->sectorsize);
2462         ret = find_free_extent(trans, root, num_bytes, empty_size,
2463                                search_start, search_end, hint_byte, ins,
2464                                trans->alloc_exclude_start,
2465                                trans->alloc_exclude_nr, data);
2466
2467         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
2468                 num_bytes = num_bytes >> 1;
2469                 num_bytes = num_bytes & ~(root->sectorsize - 1);
2470                 num_bytes = max(num_bytes, min_alloc_size);
2471                 do_chunk_alloc(trans, root->fs_info->extent_root,
2472                                num_bytes, data, 1);
2473                 goto again;
2474         }
2475         if (ret) {
2476                 struct btrfs_space_info *sinfo;
2477
2478                 sinfo = __find_space_info(root->fs_info, data);
2479                 printk("allocation failed flags %Lu, wanted %Lu\n",
2480                        data, num_bytes);
2481                 dump_space_info(sinfo, num_bytes);
2482                 BUG();
2483         }
2484         cache = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2485         if (!cache) {
2486                 printk(KERN_ERR "Unable to find block group for %Lu\n", ins->objectid);
2487                 return -ENOSPC;
2488         }
2489
2490         ret = btrfs_remove_free_space(cache, ins->objectid, ins->offset);
2491
2492         return ret;
2493 }
2494
2495 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
2496 {
2497         struct btrfs_block_group_cache *cache;
2498
2499         maybe_lock_mutex(root);
2500         cache = btrfs_lookup_block_group(root->fs_info, start);
2501         if (!cache) {
2502                 printk(KERN_ERR "Unable to find block group for %Lu\n", start);
2503                 maybe_unlock_mutex(root);
2504                 return -ENOSPC;
2505         }
2506         btrfs_add_free_space(cache, start, len);
2507         update_reserved_extents(root, start, len, 0);
2508         maybe_unlock_mutex(root);
2509         return 0;
2510 }
2511
2512 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2513                                   struct btrfs_root *root,
2514                                   u64 num_bytes, u64 min_alloc_size,
2515                                   u64 empty_size, u64 hint_byte,
2516                                   u64 search_end, struct btrfs_key *ins,
2517                                   u64 data)
2518 {
2519         int ret;
2520         maybe_lock_mutex(root);
2521         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
2522                                      empty_size, hint_byte, search_end, ins,
2523                                      data);
2524         update_reserved_extents(root, ins->objectid, ins->offset, 1);
2525         maybe_unlock_mutex(root);
2526         return ret;
2527 }
2528
2529 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2530                                          struct btrfs_root *root, u64 parent,
2531                                          u64 root_objectid, u64 ref_generation,
2532                                          u64 owner, u64 owner_offset,
2533                                          struct btrfs_key *ins)
2534 {
2535         int ret;
2536         int pending_ret;
2537         u64 super_used;
2538         u64 root_used;
2539         u64 num_bytes = ins->offset;
2540         u32 sizes[2];
2541         struct btrfs_fs_info *info = root->fs_info;
2542         struct btrfs_root *extent_root = info->extent_root;
2543         struct btrfs_extent_item *extent_item;
2544         struct btrfs_extent_ref *ref;
2545         struct btrfs_path *path;
2546         struct btrfs_key keys[2];
2547
2548         if (parent == 0)
2549                 parent = ins->objectid;
2550
2551         /* block accounting for super block */
2552         spin_lock_irq(&info->delalloc_lock);
2553         super_used = btrfs_super_bytes_used(&info->super_copy);
2554         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2555         spin_unlock_irq(&info->delalloc_lock);
2556
2557         /* block accounting for root item */
2558         root_used = btrfs_root_used(&root->root_item);
2559         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
2560
2561         if (root == extent_root) {
2562                 struct pending_extent_op *extent_op;
2563
2564                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2565                 BUG_ON(!extent_op);
2566
2567                 extent_op->type = PENDING_EXTENT_INSERT;
2568                 extent_op->bytenr = ins->objectid;
2569                 extent_op->num_bytes = ins->offset;
2570                 extent_op->parent = parent;
2571                 extent_op->orig_parent = 0;
2572                 extent_op->generation = ref_generation;
2573                 extent_op->orig_generation = 0;
2574                 extent_op->level = (int)owner;
2575
2576                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2577                                 ins->objectid + ins->offset - 1,
2578                                 EXTENT_LOCKED, GFP_NOFS);
2579                 set_state_private(&root->fs_info->extent_ins,
2580                                   ins->objectid, (unsigned long)extent_op);
2581                 goto update_block;
2582         }
2583
2584         memcpy(&keys[0], ins, sizeof(*ins));
2585         keys[1].objectid = ins->objectid;
2586         keys[1].type = BTRFS_EXTENT_REF_KEY;
2587         keys[1].offset = parent;
2588         sizes[0] = sizeof(*extent_item);
2589         sizes[1] = sizeof(*ref);
2590
2591         path = btrfs_alloc_path();
2592         BUG_ON(!path);
2593
2594         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2595                                        sizes, 2);
2596         BUG_ON(ret);
2597
2598         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2599                                      struct btrfs_extent_item);
2600         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2601         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2602                              struct btrfs_extent_ref);
2603
2604         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2605         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2606         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2607         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
2608         btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
2609
2610         btrfs_mark_buffer_dirty(path->nodes[0]);
2611
2612         trans->alloc_exclude_start = 0;
2613         trans->alloc_exclude_nr = 0;
2614         btrfs_free_path(path);
2615         finish_current_insert(trans, extent_root);
2616         pending_ret = del_pending_extents(trans, extent_root);
2617
2618         if (ret)
2619                 goto out;
2620         if (pending_ret) {
2621                 ret = pending_ret;
2622                 goto out;
2623         }
2624
2625 update_block:
2626         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
2627         if (ret) {
2628                 printk("update block group failed for %Lu %Lu\n",
2629                        ins->objectid, ins->offset);
2630                 BUG();
2631         }
2632 out:
2633         return ret;
2634 }
2635
2636 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2637                                 struct btrfs_root *root, u64 parent,
2638                                 u64 root_objectid, u64 ref_generation,
2639                                 u64 owner, u64 owner_offset,
2640                                 struct btrfs_key *ins)
2641 {
2642         int ret;
2643
2644         if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
2645                 return 0;
2646         maybe_lock_mutex(root);
2647         ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2648                                             root_objectid, ref_generation,
2649                                             owner, owner_offset, ins);
2650         update_reserved_extents(root, ins->objectid, ins->offset, 0);
2651         maybe_unlock_mutex(root);
2652         return ret;
2653 }
2654
2655 /*
2656  * this is used by the tree logging recovery code.  It records that
2657  * an extent has been allocated and makes sure to clear the free
2658  * space cache bits as well
2659  */
2660 int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
2661                                 struct btrfs_root *root, u64 parent,
2662                                 u64 root_objectid, u64 ref_generation,
2663                                 u64 owner, u64 owner_offset,
2664                                 struct btrfs_key *ins)
2665 {
2666         int ret;
2667         struct btrfs_block_group_cache *block_group;
2668
2669         maybe_lock_mutex(root);
2670         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2671         cache_block_group(root, block_group);
2672
2673         ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset);
2674         BUG_ON(ret);
2675         ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2676                                             root_objectid, ref_generation,
2677                                             owner, owner_offset, ins);
2678         maybe_unlock_mutex(root);
2679         return ret;
2680 }
2681
2682 /*
2683  * finds a free extent and does all the dirty work required for allocation
2684  * returns the key for the extent through ins, and a tree buffer for
2685  * the first block of the extent through buf.
2686  *
2687  * returns 0 if everything worked, non-zero otherwise.
2688  */
2689 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2690                        struct btrfs_root *root,
2691                        u64 num_bytes, u64 parent, u64 min_alloc_size,
2692                        u64 root_objectid, u64 ref_generation,
2693                        u64 owner_objectid, u64 owner_offset,
2694                        u64 empty_size, u64 hint_byte,
2695                        u64 search_end, struct btrfs_key *ins, u64 data)
2696 {
2697         int ret;
2698
2699         maybe_lock_mutex(root);
2700
2701         ret = __btrfs_reserve_extent(trans, root, num_bytes,
2702                                      min_alloc_size, empty_size, hint_byte,
2703                                      search_end, ins, data);
2704         BUG_ON(ret);
2705         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
2706                 ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2707                                         root_objectid, ref_generation,
2708                                         owner_objectid, owner_offset, ins);
2709                 BUG_ON(ret);
2710
2711         } else {
2712                 update_reserved_extents(root, ins->objectid, ins->offset, 1);
2713         }
2714         maybe_unlock_mutex(root);
2715         return ret;
2716 }
2717
2718 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
2719                                             struct btrfs_root *root,
2720                                             u64 bytenr, u32 blocksize)
2721 {
2722         struct extent_buffer *buf;
2723
2724         buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
2725         if (!buf)
2726                 return ERR_PTR(-ENOMEM);
2727         btrfs_set_header_generation(buf, trans->transid);
2728         btrfs_tree_lock(buf);
2729         clean_tree_block(trans, root, buf);
2730         btrfs_set_buffer_uptodate(buf);
2731         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2732                 set_extent_dirty(&root->dirty_log_pages, buf->start,
2733                          buf->start + buf->len - 1, GFP_NOFS);
2734         } else {
2735                 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2736                          buf->start + buf->len - 1, GFP_NOFS);
2737         }
2738         trans->blocks_used++;
2739         return buf;
2740 }
2741
2742 /*
2743  * helper function to allocate a block for a given tree
2744  * returns the tree buffer or NULL.
2745  */
2746 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2747                                              struct btrfs_root *root,
2748                                              u32 blocksize, u64 parent,
2749                                              u64 root_objectid,
2750                                              u64 ref_generation,
2751                                              int level,
2752                                              u64 hint,
2753                                              u64 empty_size)
2754 {
2755         struct btrfs_key ins;
2756         int ret;
2757         struct extent_buffer *buf;
2758
2759         ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
2760                                  root_objectid, ref_generation, level, 0,
2761                                  empty_size, hint, (u64)-1, &ins, 0);
2762         if (ret) {
2763                 BUG_ON(ret > 0);
2764                 return ERR_PTR(ret);
2765         }
2766
2767         buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
2768         return buf;
2769 }
2770
2771 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
2772                         struct btrfs_root *root, struct extent_buffer *leaf)
2773 {
2774         u64 leaf_owner;
2775         u64 leaf_generation;
2776         struct btrfs_key key;
2777         struct btrfs_file_extent_item *fi;
2778         int i;
2779         int nritems;
2780         int ret;
2781
2782         BUG_ON(!btrfs_is_leaf(leaf));
2783         nritems = btrfs_header_nritems(leaf);
2784         leaf_owner = btrfs_header_owner(leaf);
2785         leaf_generation = btrfs_header_generation(leaf);
2786
2787         for (i = 0; i < nritems; i++) {
2788                 u64 disk_bytenr;
2789                 cond_resched();
2790
2791                 btrfs_item_key_to_cpu(leaf, &key, i);
2792                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2793                         continue;
2794                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2795                 if (btrfs_file_extent_type(leaf, fi) ==
2796                     BTRFS_FILE_EXTENT_INLINE)
2797                         continue;
2798                 /*
2799                  * FIXME make sure to insert a trans record that
2800                  * repeats the snapshot del on crash
2801                  */
2802                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2803                 if (disk_bytenr == 0)
2804                         continue;
2805
2806                 mutex_lock(&root->fs_info->alloc_mutex);
2807                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2808                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2809                                 leaf->start, leaf_owner, leaf_generation,
2810                                 key.objectid, key.offset, 0);
2811                 mutex_unlock(&root->fs_info->alloc_mutex);
2812                 BUG_ON(ret);
2813
2814                 atomic_inc(&root->fs_info->throttle_gen);
2815                 wake_up(&root->fs_info->transaction_throttle);
2816                 cond_resched();
2817         }
2818         return 0;
2819 }
2820
2821 static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
2822                                         struct btrfs_root *root,
2823                                         struct btrfs_leaf_ref *ref)
2824 {
2825         int i;
2826         int ret;
2827         struct btrfs_extent_info *info = ref->extents;
2828
2829         for (i = 0; i < ref->nritems; i++) {
2830                 mutex_lock(&root->fs_info->alloc_mutex);
2831                 ret = __btrfs_free_extent(trans, root, info->bytenr,
2832                                           info->num_bytes, ref->bytenr,
2833                                           ref->owner, ref->generation,
2834                                           info->objectid, info->offset, 0);
2835                 mutex_unlock(&root->fs_info->alloc_mutex);
2836
2837                 atomic_inc(&root->fs_info->throttle_gen);
2838                 wake_up(&root->fs_info->transaction_throttle);
2839                 cond_resched();
2840
2841                 BUG_ON(ret);
2842                 info++;
2843         }
2844
2845         return 0;
2846 }
2847
2848 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2849                               u32 *refs)
2850 {
2851         int ret;
2852
2853         ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs);
2854         BUG_ON(ret);
2855
2856 #if 0 // some debugging code in case we see problems here
2857         /* if the refs count is one, it won't get increased again.  But
2858          * if the ref count is > 1, someone may be decreasing it at
2859          * the same time we are.
2860          */
2861         if (*refs != 1) {
2862                 struct extent_buffer *eb = NULL;
2863                 eb = btrfs_find_create_tree_block(root, start, len);
2864                 if (eb)
2865                         btrfs_tree_lock(eb);
2866
2867                 mutex_lock(&root->fs_info->alloc_mutex);
2868                 ret = lookup_extent_ref(NULL, root, start, len, refs);
2869                 BUG_ON(ret);
2870                 mutex_unlock(&root->fs_info->alloc_mutex);
2871
2872                 if (eb) {
2873                         btrfs_tree_unlock(eb);
2874                         free_extent_buffer(eb);
2875                 }
2876                 if (*refs == 1) {
2877                         printk("block %llu went down to one during drop_snap\n",
2878                                (unsigned long long)start);
2879                 }
2880
2881         }
2882 #endif
2883
2884         cond_resched();
2885         return ret;
2886 }
2887
2888 /*
2889  * helper function for drop_snapshot, this walks down the tree dropping ref
2890  * counts as it goes.
2891  */
2892 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2893                                    struct btrfs_root *root,
2894                                    struct btrfs_path *path, int *level)
2895 {
2896         u64 root_owner;
2897         u64 root_gen;
2898         u64 bytenr;
2899         u64 ptr_gen;
2900         struct extent_buffer *next;
2901         struct extent_buffer *cur;
2902         struct extent_buffer *parent;
2903         struct btrfs_leaf_ref *ref;
2904         u32 blocksize;
2905         int ret;
2906         u32 refs;
2907
2908         WARN_ON(*level < 0);
2909         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2910         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
2911                                 path->nodes[*level]->len, &refs);
2912         BUG_ON(ret);
2913         if (refs > 1)
2914                 goto out;
2915
2916         /*
2917          * walk down to the last node level and free all the leaves
2918          */
2919         while(*level >= 0) {
2920                 WARN_ON(*level < 0);
2921                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2922                 cur = path->nodes[*level];
2923
2924                 if (btrfs_header_level(cur) != *level)
2925                         WARN_ON(1);
2926
2927                 if (path->slots[*level] >=
2928                     btrfs_header_nritems(cur))
2929                         break;
2930                 if (*level == 0) {
2931                         ret = btrfs_drop_leaf_ref(trans, root, cur);
2932                         BUG_ON(ret);
2933                         break;
2934                 }
2935                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2936                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2937                 blocksize = btrfs_level_size(root, *level - 1);
2938
2939                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
2940                 BUG_ON(ret);
2941                 if (refs != 1) {
2942                         parent = path->nodes[*level];
2943                         root_owner = btrfs_header_owner(parent);
2944                         root_gen = btrfs_header_generation(parent);
2945                         path->slots[*level]++;
2946
2947                         mutex_lock(&root->fs_info->alloc_mutex);
2948                         ret = __btrfs_free_extent(trans, root, bytenr,
2949                                                 blocksize, parent->start,
2950                                                 root_owner, root_gen, 0, 0, 1);
2951                         BUG_ON(ret);
2952                         mutex_unlock(&root->fs_info->alloc_mutex);
2953
2954                         atomic_inc(&root->fs_info->throttle_gen);
2955                         wake_up(&root->fs_info->transaction_throttle);
2956                         cond_resched();
2957
2958                         continue;
2959                 }
2960                 /*
2961                  * at this point, we have a single ref, and since the
2962                  * only place referencing this extent is a dead root
2963                  * the reference count should never go higher.
2964                  * So, we don't need to check it again
2965                  */
2966                 if (*level == 1) {
2967                         ref = btrfs_lookup_leaf_ref(root, bytenr);
2968                         if (ref && ref->generation != ptr_gen) {
2969                                 btrfs_free_leaf_ref(root, ref);
2970                                 ref = NULL;
2971                         }
2972                         if (ref) {
2973                                 ret = cache_drop_leaf_ref(trans, root, ref);
2974                                 BUG_ON(ret);
2975                                 btrfs_remove_leaf_ref(root, ref);
2976                                 btrfs_free_leaf_ref(root, ref);
2977                                 *level = 0;
2978                                 break;
2979                         }
2980                         if (printk_ratelimit())
2981                                 printk("leaf ref miss for bytenr %llu\n",
2982                                        (unsigned long long)bytenr);
2983                 }
2984                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2985                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2986                         free_extent_buffer(next);
2987
2988                         next = read_tree_block(root, bytenr, blocksize,
2989                                                ptr_gen);
2990                         cond_resched();
2991 #if 0
2992                         /*
2993                          * this is a debugging check and can go away
2994                          * the ref should never go all the way down to 1
2995                          * at this point
2996                          */
2997                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2998                                                 &refs);
2999                         BUG_ON(ret);
3000                         WARN_ON(refs != 1);
3001 #endif
3002                 }
3003                 WARN_ON(*level <= 0);
3004                 if (path->nodes[*level-1])
3005                         free_extent_buffer(path->nodes[*level-1]);
3006                 path->nodes[*level-1] = next;
3007                 *level = btrfs_header_level(next);
3008                 path->slots[*level] = 0;
3009                 cond_resched();
3010         }
3011 out:
3012         WARN_ON(*level < 0);
3013         WARN_ON(*level >= BTRFS_MAX_LEVEL);
3014
3015         if (path->nodes[*level] == root->node) {
3016                 parent = path->nodes[*level];
3017                 bytenr = path->nodes[*level]->start;
3018         } else {
3019                 parent = path->nodes[*level + 1];
3020                 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
3021         }
3022
3023         blocksize = btrfs_level_size(root, *level);
3024         root_owner = btrfs_header_owner(parent);
3025         root_gen = btrfs_header_generation(parent);
3026
3027         mutex_lock(&root->fs_info->alloc_mutex);
3028         ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
3029                                   parent->start, root_owner, root_gen,
3030                                   0, 0, 1);
3031         mutex_unlock(&root->fs_info->alloc_mutex);
3032         free_extent_buffer(path->nodes[*level]);
3033         path->nodes[*level] = NULL;
3034         *level += 1;
3035         BUG_ON(ret);
3036
3037         cond_resched();
3038         return 0;
3039 }
3040
3041 /*
3042  * helper for dropping snapshots.  This walks back up the tree in the path
3043  * to find the first node higher up where we haven't yet gone through
3044  * all the slots
3045  */
3046 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3047                                  struct btrfs_root *root,
3048                                  struct btrfs_path *path, int *level)
3049 {
3050         u64 root_owner;
3051         u64 root_gen;
3052         struct btrfs_root_item *root_item = &root->root_item;
3053         int i;
3054         int slot;
3055         int ret;
3056
3057         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
3058                 slot = path->slots[i];
3059                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3060                         struct extent_buffer *node;
3061                         struct btrfs_disk_key disk_key;
3062                         node = path->nodes[i];
3063                         path->slots[i]++;
3064                         *level = i;
3065                         WARN_ON(*level == 0);
3066                         btrfs_node_key(node, &disk_key, path->slots[i]);
3067                         memcpy(&root_item->drop_progress,
3068                                &disk_key, sizeof(disk_key));
3069                         root_item->drop_level = i;
3070                         return 0;
3071                 } else {
3072                         struct extent_buffer *parent;
3073                         if (path->nodes[*level] == root->node)
3074                                 parent = path->nodes[*level];
3075                         else
3076                                 parent = path->nodes[*level + 1];
3077
3078                         root_owner = btrfs_header_owner(parent);
3079                         root_gen = btrfs_header_generation(parent);
3080                         ret = btrfs_free_extent(trans, root,
3081                                                 path->nodes[*level]->start,
3082                                                 path->nodes[*level]->len,
3083                                                 parent->start,
3084                                                 root_owner, root_gen, 0, 0, 1);
3085                         BUG_ON(ret);
3086                         free_extent_buffer(path->nodes[*level]);
3087                         path->nodes[*level] = NULL;
3088                         *level = i + 1;
3089                 }
3090         }
3091         return 1;
3092 }
3093
3094 /*
3095  * drop the reference count on the tree rooted at 'snap'.  This traverses
3096  * the tree freeing any blocks that have a ref count of zero after being
3097  * decremented.
3098  */
3099 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3100                         *root)
3101 {
3102         int ret = 0;
3103         int wret;
3104         int level;
3105         struct btrfs_path *path;
3106         int i;
3107         int orig_level;
3108         struct btrfs_root_item *root_item = &root->root_item;
3109
3110         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
3111         path = btrfs_alloc_path();
3112         BUG_ON(!path);
3113
3114         level = btrfs_header_level(root->node);
3115         orig_level = level;
3116         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3117                 path->nodes[level] = root->node;
3118                 extent_buffer_get(root->node);
3119                 path->slots[level] = 0;
3120         } else {
3121                 struct btrfs_key key;
3122                 struct btrfs_disk_key found_key;
3123                 struct extent_buffer *node;
3124
3125                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3126                 level = root_item->drop_level;
3127                 path->lowest_level = level;
3128                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3129                 if (wret < 0) {
3130                         ret = wret;
3131                         goto out;
3132                 }
3133                 node = path->nodes[level];
3134                 btrfs_node_key(node, &found_key, path->slots[level]);
3135                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3136                                sizeof(found_key)));
3137                 /*
3138                  * unlock our path, this is safe because only this
3139                  * function is allowed to delete this snapshot
3140                  */
3141                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
3142                         if (path->nodes[i] && path->locks[i]) {
3143                                 path->locks[i] = 0;
3144                                 btrfs_tree_unlock(path->nodes[i]);
3145                         }
3146                 }
3147         }
3148         while(1) {
3149                 wret = walk_down_tree(trans, root, path, &level);
3150                 if (wret > 0)
3151                         break;
3152                 if (wret < 0)
3153                         ret = wret;
3154
3155                 wret = walk_up_tree(trans, root, path, &level);
3156                 if (wret > 0)
3157                         break;
3158                 if (wret < 0)
3159                         ret = wret;
3160                 if (trans->transaction->in_commit) {
3161                         ret = -EAGAIN;
3162                         break;
3163                 }
3164                 atomic_inc(&root->fs_info->throttle_gen);
3165                 wake_up(&root->fs_info->transaction_throttle);
3166         }
3167         for (i = 0; i <= orig_level; i++) {
3168                 if (path->nodes[i]) {
3169                         free_extent_buffer(path->nodes[i]);
3170                         path->nodes[i] = NULL;
3171                 }
3172         }
3173 out:
3174         btrfs_free_path(path);
3175         return ret;
3176 }
3177
3178 static unsigned long calc_ra(unsigned long start, unsigned long last,
3179                              unsigned long nr)
3180 {
3181         return min(last, start + nr - 1);
3182 }
3183
3184 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
3185                                          u64 len)
3186 {
3187         u64 page_start;
3188         u64 page_end;
3189         unsigned long first_index;
3190         unsigned long last_index;
3191         unsigned long i;
3192         struct page *page;
3193         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3194         struct file_ra_state *ra;
3195         struct btrfs_ordered_extent *ordered;
3196         unsigned int total_read = 0;
3197         unsigned int total_dirty = 0;
3198         int ret = 0;
3199
3200         ra = kzalloc(sizeof(*ra), GFP_NOFS);
3201
3202         mutex_lock(&inode->i_mutex);
3203         first_index = start >> PAGE_CACHE_SHIFT;
3204         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
3205
3206         /* make sure the dirty trick played by the caller work */
3207         ret = invalidate_inode_pages2_range(inode->i_mapping,
3208                                             first_index, last_index);
3209         if (ret)
3210                 goto out_unlock;
3211
3212         file_ra_state_init(ra, inode->i_mapping);
3213
3214         for (i = first_index ; i <= last_index; i++) {
3215                 if (total_read % ra->ra_pages == 0) {
3216                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
3217                                        calc_ra(i, last_index, ra->ra_pages));
3218                 }
3219                 total_read++;
3220 again:
3221                 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
3222                         BUG_ON(1);
3223                 page = grab_cache_page(inode->i_mapping, i);
3224                 if (!page) {
3225                         ret = -ENOMEM;
3226                         goto out_unlock;
3227                 }
3228                 if (!PageUptodate(page)) {
3229                         btrfs_readpage(NULL, page);
3230                         lock_page(page);
3231                         if (!PageUptodate(page)) {
3232                                 unlock_page(page);
3233                                 page_cache_release(page);
3234                                 ret = -EIO;
3235                                 goto out_unlock;
3236                         }
3237                 }
3238                 wait_on_page_writeback(page);
3239
3240                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
3241                 page_end = page_start + PAGE_CACHE_SIZE - 1;
3242                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
3243
3244                 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3245                 if (ordered) {
3246                         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3247                         unlock_page(page);
3248                         page_cache_release(page);
3249                         btrfs_start_ordered_extent(inode, ordered, 1);
3250                         btrfs_put_ordered_extent(ordered);
3251                         goto again;
3252                 }
3253                 set_page_extent_mapped(page);
3254
3255                 btrfs_set_extent_delalloc(inode, page_start, page_end);
3256                 if (i == first_index)
3257                         set_extent_bits(io_tree, page_start, page_end,
3258                                         EXTENT_BOUNDARY, GFP_NOFS);
3259
3260                 set_page_dirty(page);
3261                 total_dirty++;
3262
3263                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3264                 unlock_page(page);
3265                 page_cache_release(page);
3266         }
3267
3268 out_unlock:
3269         kfree(ra);
3270         mutex_unlock(&inode->i_mutex);
3271         balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
3272         return ret;
3273 }
3274
3275 static int noinline relocate_data_extent(struct inode *reloc_inode,
3276                                          struct btrfs_key *extent_key,
3277                                          u64 offset)
3278 {
3279         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
3280         struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
3281         struct extent_map *em;
3282
3283         em = alloc_extent_map(GFP_NOFS);
3284         BUG_ON(!em || IS_ERR(em));
3285
3286         em->start = extent_key->objectid - offset;
3287         em->len = extent_key->offset;
3288         em->block_start = extent_key->objectid;
3289         em->bdev = root->fs_info->fs_devices->latest_bdev;
3290         set_bit(EXTENT_FLAG_PINNED, &em->flags);
3291
3292         /* setup extent map to cheat btrfs_readpage */
3293         mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex);
3294         while (1) {
3295                 int ret;
3296                 spin_lock(&em_tree->lock);
3297                 ret = add_extent_mapping(em_tree, em);
3298                 spin_unlock(&em_tree->lock);
3299                 if (ret != -EEXIST) {
3300                         free_extent_map(em);
3301                         break;
3302                 }
3303                 btrfs_drop_extent_cache(reloc_inode, em->start,
3304                                         em->start + em->len - 1, 0);
3305         }
3306         mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex);
3307
3308         return relocate_inode_pages(reloc_inode, extent_key->objectid - offset,
3309                                     extent_key->offset);
3310 }
3311
3312 struct btrfs_ref_path {
3313         u64 extent_start;
3314         u64 nodes[BTRFS_MAX_LEVEL];
3315         u64 root_objectid;
3316         u64 root_generation;
3317         u64 owner_objectid;
3318         u64 owner_offset;
3319         u32 num_refs;
3320         int lowest_level;
3321         int current_level;
3322 };
3323
3324 struct disk_extent {
3325         u64 disk_bytenr;
3326         u64 disk_num_bytes;
3327         u64 offset;
3328         u64 num_bytes;
3329 };
3330
3331 static int is_cowonly_root(u64 root_objectid)
3332 {
3333         if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
3334             root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
3335             root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
3336             root_objectid == BTRFS_DEV_TREE_OBJECTID ||
3337             root_objectid == BTRFS_TREE_LOG_OBJECTID)
3338                 return 1;
3339         return 0;
3340 }
3341
3342 static int noinline __next_ref_path(struct btrfs_trans_handle *trans,
3343                                     struct btrfs_root *extent_root,
3344                                     struct btrfs_ref_path *ref_path,
3345                                     int first_time)
3346 {
3347         struct extent_buffer *leaf;
3348         struct btrfs_path *path;
3349         struct btrfs_extent_ref *ref;
3350         struct btrfs_key key;
3351         struct btrfs_key found_key;
3352         u64 bytenr;
3353         u32 nritems;
3354         int level;
3355         int ret = 1;
3356
3357         path = btrfs_alloc_path();
3358         if (!path)
3359                 return -ENOMEM;
3360
3361         mutex_lock(&extent_root->fs_info->alloc_mutex);
3362
3363         if (first_time) {
3364                 ref_path->lowest_level = -1;
3365                 ref_path->current_level = -1;
3366                 goto walk_up;
3367         }
3368 walk_down:
3369         level = ref_path->current_level - 1;
3370         while (level >= -1) {
3371                 u64 parent;
3372                 if (level < ref_path->lowest_level)
3373                         break;
3374
3375                 if (level >= 0) {
3376                         bytenr = ref_path->nodes[level];
3377                 } else {
3378                         bytenr = ref_path->extent_start;
3379                 }
3380                 BUG_ON(bytenr == 0);
3381
3382                 parent = ref_path->nodes[level + 1];
3383                 ref_path->nodes[level + 1] = 0;
3384                 ref_path->current_level = level;
3385                 BUG_ON(parent == 0);
3386
3387                 key.objectid = bytenr;
3388                 key.offset = parent + 1;
3389                 key.type = BTRFS_EXTENT_REF_KEY;
3390
3391                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
3392                 if (ret < 0)
3393                         goto out;
3394                 BUG_ON(ret == 0);
3395
3396                 leaf = path->nodes[0];
3397                 nritems = btrfs_header_nritems(leaf);
3398                 if (path->slots[0] >= nritems) {
3399                         ret = btrfs_next_leaf(extent_root, path);
3400                         if (ret < 0)
3401                                 goto out;
3402                         if (ret > 0)
3403                                 goto next;
3404                         leaf = path->nodes[0];
3405                 }
3406
3407                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3408                 if (found_key.objectid == bytenr &&
3409                                 found_key.type == BTRFS_EXTENT_REF_KEY)
3410                         goto found;
3411 next:
3412                 level--;
3413                 btrfs_release_path(extent_root, path);
3414                 if (need_resched()) {
3415                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
3416                         cond_resched();
3417                         mutex_lock(&extent_root->fs_info->alloc_mutex);
3418                 }
3419         }
3420         /* reached lowest level */
3421         ret = 1;
3422         goto out;
3423 walk_up:
3424         level = ref_path->current_level;
3425         while (level < BTRFS_MAX_LEVEL - 1) {
3426                 u64 ref_objectid;
3427                 if (level >= 0) {
3428                         bytenr = ref_path->nodes[level];
3429                 } else {
3430                         bytenr = ref_path->extent_start;
3431                 }
3432                 BUG_ON(bytenr == 0);
3433
3434                 key.objectid = bytenr;
3435                 key.offset = 0;
3436                 key.type = BTRFS_EXTENT_REF_KEY;
3437
3438                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
3439                 if (ret < 0)
3440                         goto out;
3441
3442                 leaf = path->nodes[0];
3443                 nritems = btrfs_header_nritems(leaf);
3444                 if (path->slots[0] >= nritems) {
3445                         ret = btrfs_next_leaf(extent_root, path);
3446                         if (ret < 0)
3447                                 goto out;
3448                         if (ret > 0) {
3449                                 /* the extent was freed by someone */
3450                                 if (ref_path->lowest_level == level)
3451                                         goto out;
3452                                 btrfs_release_path(extent_root, path);
3453                                 goto walk_down;
3454                         }
3455                         leaf = path->nodes[0];
3456                 }
3457
3458                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3459                 if (found_key.objectid != bytenr ||
3460                                 found_key.type != BTRFS_EXTENT_REF_KEY) {
3461                         /* the extent was freed by someone */
3462                         if (ref_path->lowest_level == level) {
3463                                 ret = 1;
3464                                 goto out;
3465                         }
3466                         btrfs_release_path(extent_root, path);
3467                         goto walk_down;
3468                 }
3469 found:
3470                 ref = btrfs_item_ptr(leaf, path->slots[0],
3471                                 struct btrfs_extent_ref);
3472                 ref_objectid = btrfs_ref_objectid(leaf, ref);
3473                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
3474                         if (first_time) {
3475                                 level = (int)ref_objectid;
3476                                 BUG_ON(level >= BTRFS_MAX_LEVEL);
3477                                 ref_path->lowest_level = level;
3478                                 ref_path->current_level = level;
3479                                 ref_path->nodes[level] = bytenr;
3480                         } else {
3481                                 WARN_ON(ref_objectid != level);
3482                         }
3483                 } else {
3484                         WARN_ON(level != -1);
3485                 }
3486                 first_time = 0;
3487
3488                 if (ref_path->lowest_level == level) {
3489                         ref_path->owner_objectid = ref_objectid;
3490                         ref_path->owner_offset = btrfs_ref_offset(leaf, ref);
3491                         ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
3492                 }
3493
3494                 /*
3495                  * the block is tree root or the block isn't in reference
3496                  * counted tree.
3497                  */
3498                 if (found_key.objectid == found_key.offset ||
3499                     is_cowonly_root(btrfs_ref_root(leaf, ref))) {
3500                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
3501                         ref_path->root_generation =
3502                                 btrfs_ref_generation(leaf, ref);
3503                         if (level < 0) {
3504                                 /* special reference from the tree log */
3505                                 ref_path->nodes[0] = found_key.offset;
3506                                 ref_path->current_level = 0;
3507                         }
3508                         ret = 0;
3509                         goto out;
3510                 }
3511
3512                 level++;
3513                 BUG_ON(ref_path->nodes[level] != 0);
3514                 ref_path->nodes[level] = found_key.offset;
3515                 ref_path->current_level = level;
3516
3517                 /*
3518                  * the reference was created in the running transaction,
3519                  * no need to continue walking up.
3520                  */
3521                 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
3522                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
3523                         ref_path->root_generation =
3524                                 btrfs_ref_generation(leaf, ref);
3525                         ret = 0;
3526                         goto out;
3527                 }
3528
3529                 btrfs_release_path(extent_root, path);
3530                 if (need_resched()) {
3531                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
3532                         cond_resched();
3533                         mutex_lock(&extent_root->fs_info->alloc_mutex);
3534                 }
3535         }
3536         /* reached max tree level, but no tree root found. */
3537         BUG();
3538 out:
3539         mutex_unlock(&extent_root->fs_info->alloc_mutex);
3540         btrfs_free_path(path);
3541         return ret;
3542 }
3543
3544 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
3545                                 struct btrfs_root *extent_root,
3546                                 struct btrfs_ref_path *ref_path,
3547                                 u64 extent_start)
3548 {
3549         memset(ref_path, 0, sizeof(*ref_path));
3550         ref_path->extent_start = extent_start;
3551
3552         return __next_ref_path(trans, extent_root, ref_path, 1);
3553 }
3554
3555 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
3556                                struct btrfs_root *extent_root,
3557                                struct btrfs_ref_path *ref_path)
3558 {
3559         return __next_ref_path(trans, extent_root, ref_path, 0);
3560 }
3561
3562 static int noinline get_new_locations(struct inode *reloc_inode,
3563                                       struct btrfs_key *extent_key,
3564                                       u64 offset, int no_fragment,
3565                                       struct disk_extent **extents,
3566                                       int *nr_extents)
3567 {
3568         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
3569         struct btrfs_path *path;
3570         struct btrfs_file_extent_item *fi;
3571         struct extent_buffer *leaf;
3572         struct disk_extent *exts = *extents;
3573         struct btrfs_key found_key;
3574         u64 cur_pos;
3575         u64 last_byte;
3576         u32 nritems;
3577         int nr = 0;
3578         int max = *nr_extents;
3579         int ret;
3580
3581         WARN_ON(!no_fragment && *extents);
3582         if (!exts) {
3583                 max = 1;
3584                 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
3585                 if (!exts)
3586                         return -ENOMEM;
3587         }
3588
3589         path = btrfs_alloc_path();
3590         BUG_ON(!path);
3591
3592         cur_pos = extent_key->objectid - offset;
3593         last_byte = extent_key->objectid + extent_key->offset;
3594         ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
3595                                        cur_pos, 0);
3596         if (ret < 0)
3597                 goto out;
3598         if (ret > 0) {
3599                 ret = -ENOENT;
3600                 goto out;
3601         }
3602
3603         while (1) {
3604                 leaf = path->nodes[0];
3605                 nritems = btrfs_header_nritems(leaf);
3606                 if (path->slots[0] >= nritems) {
3607                         ret = btrfs_next_leaf(root, path);
3608                         if (ret < 0)
3609                                 goto out;
3610                         if (ret > 0)
3611                                 break;
3612                         leaf = path->nodes[0];
3613                 }
3614
3615                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3616                 if (found_key.offset != cur_pos ||
3617                     found_key.type != BTRFS_EXTENT_DATA_KEY ||
3618                     found_key.objectid != reloc_inode->i_ino)
3619                         break;
3620
3621                 fi = btrfs_item_ptr(leaf, path->slots[0],
3622                                     struct btrfs_file_extent_item);
3623                 if (btrfs_file_extent_type(leaf, fi) !=
3624                     BTRFS_FILE_EXTENT_REG ||
3625                     btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
3626                         break;
3627
3628                 if (nr == max) {
3629                         struct disk_extent *old = exts;
3630                         max *= 2;
3631                         exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
3632                         memcpy(exts, old, sizeof(*exts) * nr);
3633                         if (old != *extents)
3634                                 kfree(old);
3635                 }
3636
3637                 exts[nr].disk_bytenr =
3638                         btrfs_file_extent_disk_bytenr(leaf, fi);
3639                 exts[nr].disk_num_bytes =
3640                         btrfs_file_extent_disk_num_bytes(leaf, fi);
3641                 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
3642                 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
3643                 WARN_ON(exts[nr].offset > 0);
3644                 WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
3645
3646                 cur_pos += exts[nr].num_bytes;
3647                 nr++;
3648
3649                 if (cur_pos + offset >= last_byte)
3650                         break;
3651
3652                 if (no_fragment) {
3653                         ret = 1;
3654                         goto out;
3655                 }
3656                 path->slots[0]++;
3657         }
3658
3659         WARN_ON(cur_pos + offset > last_byte);
3660         if (cur_pos + offset < last_byte) {
3661                 ret = -ENOENT;
3662                 goto out;
3663         }
3664         ret = 0;
3665 out:
3666         btrfs_free_path(path);
3667         if (ret) {
3668                 if (exts != *extents)
3669                         kfree(exts);
3670         } else {
3671                 *extents = exts;
3672                 *nr_extents = nr;
3673         }
3674         return ret;
3675 }
3676
3677 static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
3678                                         struct btrfs_root *root,
3679                                         struct btrfs_path *path,
3680                                         struct btrfs_key *extent_key,
3681                                         struct btrfs_key *leaf_key,
3682                                         struct btrfs_ref_path *ref_path,
3683                                         struct disk_extent *new_extents,
3684                                         int nr_extents)
3685 {
3686         struct extent_buffer *leaf;
3687         struct btrfs_file_extent_item *fi;
3688         struct inode *inode = NULL;
3689         struct btrfs_key key;
3690         u64 lock_start = 0;
3691         u64 lock_end = 0;
3692         u64 num_bytes;
3693         u64 ext_offset;
3694         u64 first_pos;
3695         u32 nritems;
3696         int extent_locked = 0;
3697         int ret;
3698
3699         first_pos = ref_path->owner_offset;
3700         if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
3701                 key.objectid = ref_path->owner_objectid;
3702                 key.offset = ref_path->owner_offset;
3703                 key.type = BTRFS_EXTENT_DATA_KEY;
3704         } else {
3705                 memcpy(&key, leaf_key, sizeof(key));
3706         }
3707
3708         while (1) {
3709                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3710                 if (ret < 0)
3711                         goto out;
3712
3713                 leaf = path->nodes[0];
3714                 nritems = btrfs_header_nritems(leaf);
3715 next:
3716                 if (extent_locked && ret > 0) {
3717                         /*
3718                          * the file extent item was modified by someone
3719                          * before the extent got locked.
3720                          */
3721                         mutex_unlock(&BTRFS_I(inode)->extent_mutex);
3722                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3723                                       lock_end, GFP_NOFS);
3724                         extent_locked = 0;
3725                 }
3726
3727                 if (path->slots[0] >= nritems) {
3728                         if (ref_path->owner_objectid ==
3729                             BTRFS_MULTIPLE_OBJECTIDS)
3730                                 break;
3731
3732                         BUG_ON(extent_locked);
3733                         ret = btrfs_next_leaf(root, path);
3734                         if (ret < 0)
3735                                 goto out;
3736                         if (ret > 0)
3737                                 break;
3738                         leaf = path->nodes[0];
3739                         nritems = btrfs_header_nritems(leaf);
3740                 }
3741
3742                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3743
3744                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
3745                         if ((key.objectid > ref_path->owner_objectid) ||
3746                             (key.objectid == ref_path->owner_objectid &&
3747                              key.type > BTRFS_EXTENT_DATA_KEY) ||
3748                             (key.offset >= first_pos + extent_key->offset))
3749                                 break;
3750                 }
3751
3752                 if (inode && key.objectid != inode->i_ino) {
3753                         BUG_ON(extent_locked);
3754                         btrfs_release_path(root, path);
3755                         mutex_unlock(&inode->i_mutex);
3756                         iput(inode);
3757                         inode = NULL;
3758                         continue;
3759                 }
3760
3761                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
3762                         path->slots[0]++;
3763                         ret = 1;
3764                         goto next;
3765                 }
3766                 fi = btrfs_item_ptr(leaf, path->slots[0],
3767                                     struct btrfs_file_extent_item);
3768                 if ((btrfs_file_extent_type(leaf, fi) !=
3769                      BTRFS_FILE_EXTENT_REG) ||
3770                     (btrfs_file_extent_disk_bytenr(leaf, fi) !=
3771                      extent_key->objectid)) {
3772                         path->slots[0]++;
3773                         ret = 1;
3774                         goto next;
3775                 }
3776
3777                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
3778                 ext_offset = btrfs_file_extent_offset(leaf, fi);
3779
3780                 if (first_pos > key.offset - ext_offset)
3781                         first_pos = key.offset - ext_offset;
3782
3783                 if (!extent_locked) {
3784                         lock_start = key.offset;
3785                         lock_end = lock_start + num_bytes - 1;
3786                 } else {
3787                         BUG_ON(lock_start != key.offset);
3788                         BUG_ON(lock_end - lock_start + 1 < num_bytes);
3789                 }
3790
3791                 if (!inode) {
3792                         btrfs_release_path(root, path);
3793
3794                         inode = btrfs_iget_locked(root->fs_info->sb,
3795                                                   key.objectid, root);
3796                         if (inode->i_state & I_NEW) {
3797                                 BTRFS_I(inode)->root = root;
3798                                 BTRFS_I(inode)->location.objectid =
3799                                         key.objectid;
3800                                 BTRFS_I(inode)->location.type =
3801                                         BTRFS_INODE_ITEM_KEY;
3802                                 BTRFS_I(inode)->location.offset = 0;
3803                                 btrfs_read_locked_inode(inode);
3804                                 unlock_new_inode(inode);
3805                         }
3806                         /*
3807                          * some code call btrfs_commit_transaction while
3808                          * holding the i_mutex, so we can't use mutex_lock
3809                          * here.
3810                          */
3811                         if (is_bad_inode(inode) ||
3812                             !mutex_trylock(&inode->i_mutex)) {
3813                                 iput(inode);
3814                                 inode = NULL;
3815                                 key.offset = (u64)-1;
3816                                 goto skip;
3817                         }
3818                 }
3819
3820                 if (!extent_locked) {
3821                         struct btrfs_ordered_extent *ordered;
3822
3823                         btrfs_release_path(root, path);
3824
3825                         lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3826                                     lock_end, GFP_NOFS);
3827                         ordered = btrfs_lookup_first_ordered_extent(inode,
3828                                                                     lock_end);
3829                         if (ordered &&
3830                             ordered->file_offset <= lock_end &&
3831                             ordered->file_offset + ordered->len > lock_start) {
3832                                 unlock_extent(&BTRFS_I(inode)->io_tree,
3833                                               lock_start, lock_end, GFP_NOFS);
3834                                 btrfs_start_ordered_extent(inode, ordered, 1);
3835                                 btrfs_put_ordered_extent(ordered);
3836                                 key.offset += num_bytes;
3837                                 goto skip;
3838                         }
3839                         if (ordered)
3840                                 btrfs_put_ordered_extent(ordered);
3841
3842                         mutex_lock(&BTRFS_I(inode)->extent_mutex);
3843                         extent_locked = 1;
3844                         continue;
3845                 }
3846
3847                 if (nr_extents == 1) {
3848                         /* update extent pointer in place */
3849                         btrfs_set_file_extent_generation(leaf, fi,
3850                                                 trans->transid);
3851                         btrfs_set_file_extent_disk_bytenr(leaf, fi,
3852                                                 new_extents[0].disk_bytenr);
3853                         btrfs_set_file_extent_disk_num_bytes(leaf, fi,
3854                                                 new_extents[0].disk_num_bytes);
3855                         ext_offset += new_extents[0].offset;
3856                         btrfs_set_file_extent_offset(leaf, fi, ext_offset);
3857                         btrfs_mark_buffer_dirty(leaf);
3858
3859                         btrfs_drop_extent_cache(inode, key.offset,
3860                                                 key.offset + num_bytes - 1, 0);
3861
3862                         ret = btrfs_inc_extent_ref(trans, root,
3863                                                 new_extents[0].disk_bytenr,
3864                                                 new_extents[0].disk_num_bytes,
3865                                                 leaf->start,
3866                                                 root->root_key.objectid,
3867                                                 trans->transid,
3868                                                 key.objectid, key.offset);
3869                         BUG_ON(ret);
3870
3871                         ret = btrfs_free_extent(trans, root,
3872                                                 extent_key->objectid,
3873                                                 extent_key->offset,
3874                                                 leaf->start,
3875                                                 btrfs_header_owner(leaf),
3876                                                 btrfs_header_generation(leaf),
3877                                                 key.objectid, key.offset, 0);
3878                         BUG_ON(ret);
3879
3880                         btrfs_release_path(root, path);
3881                         key.offset += num_bytes;
3882                 } else {
3883                         u64 alloc_hint;
3884                         u64 extent_len;
3885                         int i;
3886                         /*
3887                          * drop old extent pointer at first, then insert the
3888                          * new pointers one bye one
3889                          */
3890                         btrfs_release_path(root, path);
3891                         ret = btrfs_drop_extents(trans, root, inode, key.offset,
3892                                                  key.offset + num_bytes,
3893                                                  key.offset, &alloc_hint);
3894                         BUG_ON(ret);
3895
3896                         for (i = 0; i < nr_extents; i++) {
3897                                 if (ext_offset >= new_extents[i].num_bytes) {
3898                                         ext_offset -= new_extents[i].num_bytes;
3899                                         continue;
3900                                 }
3901                                 extent_len = min(new_extents[i].num_bytes -
3902                                                  ext_offset, num_bytes);
3903
3904                                 ret = btrfs_insert_empty_item(trans, root,
3905                                                               path, &key,
3906                                                               sizeof(*fi));
3907                                 BUG_ON(ret);
3908
3909                                 leaf = path->nodes[0];
3910                                 fi = btrfs_item_ptr(leaf, path->slots[0],
3911                                                 struct btrfs_file_extent_item);
3912                                 btrfs_set_file_extent_generation(leaf, fi,
3913                                                         trans->transid);
3914                                 btrfs_set_file_extent_type(leaf, fi,
3915                                                         BTRFS_FILE_EXTENT_REG);
3916                                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
3917                                                 new_extents[i].disk_bytenr);
3918                                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
3919                                                 new_extents[i].disk_num_bytes);
3920                                 btrfs_set_file_extent_num_bytes(leaf, fi,
3921                                                         extent_len);
3922                                 ext_offset += new_extents[i].offset;
3923                                 btrfs_set_file_extent_offset(leaf, fi,
3924                                                         ext_offset);
3925                                 btrfs_mark_buffer_dirty(leaf);
3926
3927                                 btrfs_drop_extent_cache(inode, key.offset,
3928                                                 key.offset + extent_len - 1, 0);
3929
3930                                 ret = btrfs_inc_extent_ref(trans, root,
3931                                                 new_extents[i].disk_bytenr,
3932                                                 new_extents[i].disk_num_bytes,
3933                                                 leaf->start,
3934                                                 root->root_key.objectid,
3935                                                 trans->transid,
3936                                                 key.objectid, key.offset);
3937                                 BUG_ON(ret);
3938                                 btrfs_release_path(root, path);
3939
3940                                 inode->i_blocks += extent_len >> 9;
3941
3942                                 ext_offset = 0;
3943                                 num_bytes -= extent_len;
3944                                 key.offset += extent_len;
3945
3946                                 if (num_bytes == 0)
3947                                         break;
3948                         }
3949                         BUG_ON(i >= nr_extents);
3950                 }
3951
3952                 if (extent_locked) {
3953                         mutex_unlock(&BTRFS_I(inode)->extent_mutex);
3954                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3955                                       lock_end, GFP_NOFS);
3956                         extent_locked = 0;
3957                 }
3958 skip:
3959                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
3960                     key.offset >= first_pos + extent_key->offset)
3961                         break;
3962
3963                 cond_resched();
3964         }
3965         ret = 0;
3966 out:
3967         btrfs_release_path(root, path);
3968         if (inode) {
3969                 mutex_unlock(&inode->i_mutex);
3970                 if (extent_locked) {
3971                         mutex_unlock(&BTRFS_I(inode)->extent_mutex);
3972                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3973                                       lock_end, GFP_NOFS);
3974                 }
3975                 iput(inode);
3976         }
3977         return ret;
3978 }
3979
3980 int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
3981                             u64 num_bytes, u64 new_bytenr)
3982 {
3983         set_extent_bits(&root->fs_info->reloc_mapping_tree,
3984                         orig_bytenr, orig_bytenr + num_bytes - 1,
3985                         EXTENT_LOCKED, GFP_NOFS);
3986         set_state_private(&root->fs_info->reloc_mapping_tree,
3987                           orig_bytenr, new_bytenr);
3988         return 0;
3989 }
3990
3991 int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
3992                             u64 num_bytes, u64 *new_bytenr)
3993 {
3994         u64 bytenr;
3995         u64 cur_bytenr = orig_bytenr;
3996         u64 prev_bytenr = orig_bytenr;
3997         int ret;
3998
3999         while (1) {
4000                 ret = get_state_private(&root->fs_info->reloc_mapping_tree,
4001                                         cur_bytenr, &bytenr);
4002                 if (ret)
4003                         break;
4004                 prev_bytenr = cur_bytenr;
4005                 cur_bytenr = bytenr;
4006         }
4007
4008         if (orig_bytenr == cur_bytenr)
4009                 return -ENOENT;
4010
4011         if (prev_bytenr != orig_bytenr) {
4012                 set_state_private(&root->fs_info->reloc_mapping_tree,
4013                                   orig_bytenr, cur_bytenr);
4014         }
4015         *new_bytenr = cur_bytenr;
4016         return 0;
4017 }
4018
4019 void btrfs_free_reloc_mappings(struct btrfs_root *root)
4020 {
4021         clear_extent_bits(&root->fs_info->reloc_mapping_tree,
4022                           0, (u64)-1, -1, GFP_NOFS);
4023 }
4024
4025 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
4026                                struct btrfs_root *root,
4027                                struct extent_buffer *buf, u64 orig_start)
4028 {
4029         int level;
4030         int ret;
4031
4032         BUG_ON(btrfs_header_generation(buf) != trans->transid);
4033         BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
4034
4035         level = btrfs_header_level(buf);
4036         if (level == 0) {
4037                 struct btrfs_leaf_ref *ref;
4038                 struct btrfs_leaf_ref *orig_ref;
4039
4040                 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
4041                 if (!orig_ref)
4042                         return -ENOENT;
4043
4044                 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
4045                 if (!ref) {
4046                         btrfs_free_leaf_ref(root, orig_ref);
4047                         return -ENOMEM;
4048                 }
4049
4050                 ref->nritems = orig_ref->nritems;
4051                 memcpy(ref->extents, orig_ref->extents,
4052                         sizeof(ref->extents[0]) * ref->nritems);
4053
4054                 btrfs_free_leaf_ref(root, orig_ref);
4055
4056                 ref->root_gen = trans->transid;
4057                 ref->bytenr = buf->start;
4058                 ref->owner = btrfs_header_owner(buf);
4059                 ref->generation = btrfs_header_generation(buf);
4060                 ret = btrfs_add_leaf_ref(root, ref, 0);
4061                 WARN_ON(ret);
4062                 btrfs_free_leaf_ref(root, ref);
4063         }
4064         return 0;
4065 }
4066
4067 static int noinline invalidate_extent_cache(struct btrfs_root *root,
4068                                         struct extent_buffer *leaf,
4069                                         struct btrfs_block_group_cache *group,
4070                                         struct btrfs_root *target_root)
4071 {
4072         struct btrfs_key key;
4073         struct inode *inode = NULL;
4074         struct btrfs_file_extent_item *fi;
4075         u64 num_bytes;
4076         u64 skip_objectid = 0;
4077         u32 nritems;
4078         u32 i;
4079
4080         nritems = btrfs_header_nritems(leaf);
4081         for (i = 0; i < nritems; i++) {
4082                 btrfs_item_key_to_cpu(leaf, &key, i);
4083                 if (key.objectid == skip_objectid ||
4084                     key.type != BTRFS_EXTENT_DATA_KEY)
4085                         continue;
4086                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4087                 if (btrfs_file_extent_type(leaf, fi) ==
4088                     BTRFS_FILE_EXTENT_INLINE)
4089                         continue;
4090                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
4091                         continue;
4092                 if (!inode || inode->i_ino != key.objectid) {
4093                         iput(inode);
4094                         inode = btrfs_ilookup(target_root->fs_info->sb,
4095                                               key.objectid, target_root, 1);
4096                 }
4097                 if (!inode) {
4098                         skip_objectid = key.objectid;
4099                         continue;
4100                 }
4101                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4102
4103                 lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4104                             key.offset + num_bytes - 1, GFP_NOFS);
4105                 mutex_lock(&BTRFS_I(inode)->extent_mutex);
4106                 btrfs_drop_extent_cache(inode, key.offset,
4107                                         key.offset + num_bytes - 1, 1);
4108                 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
4109                 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4110                               key.offset + num_bytes - 1, GFP_NOFS);
4111                 cond_resched();
4112         }
4113         iput(inode);
4114         return 0;
4115 }
4116
4117 static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans,
4118                                         struct btrfs_root *root,
4119                                         struct extent_buffer *leaf,
4120                                         struct btrfs_block_group_cache *group,
4121                                         struct inode *reloc_inode)
4122 {
4123         struct btrfs_key key;
4124         struct btrfs_key extent_key;
4125         struct btrfs_file_extent_item *fi;
4126         struct btrfs_leaf_ref *ref;
4127         struct disk_extent *new_extent;
4128         u64 bytenr;
4129         u64 num_bytes;
4130         u32 nritems;
4131         u32 i;
4132         int ext_index;
4133         int nr_extent;
4134         int ret;
4135
4136         new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
4137         BUG_ON(!new_extent);
4138
4139         ref = btrfs_lookup_leaf_ref(root, leaf->start);
4140         BUG_ON(!ref);
4141
4142         ext_index = -1;
4143         nritems = btrfs_header_nritems(leaf);
4144         for (i = 0; i < nritems; i++) {
4145                 btrfs_item_key_to_cpu(leaf, &key, i);
4146                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
4147                         continue;
4148                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4149                 if (btrfs_file_extent_type(leaf, fi) ==
4150                     BTRFS_FILE_EXTENT_INLINE)
4151                         continue;
4152                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4153                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4154                 if (bytenr == 0)
4155                         continue;
4156
4157                 ext_index++;
4158                 if (bytenr >= group->key.objectid + group->key.offset ||
4159                     bytenr + num_bytes <= group->key.objectid)
4160                         continue;
4161
4162                 extent_key.objectid = bytenr;
4163                 extent_key.offset = num_bytes;
4164                 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4165                 nr_extent = 1;
4166                 ret = get_new_locations(reloc_inode, &extent_key,
4167                                         group->key.objectid, 1,
4168                                         &new_extent, &nr_extent);
4169                 if (ret > 0)
4170                         continue;
4171                 BUG_ON(ret < 0);
4172
4173                 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
4174                 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
4175                 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
4176                 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
4177
4178                 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
4179                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4180                                                 new_extent->disk_bytenr);
4181                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4182                                                 new_extent->disk_num_bytes);
4183                 new_extent->offset += btrfs_file_extent_offset(leaf, fi);
4184                 btrfs_set_file_extent_offset(leaf, fi, new_extent->offset);
4185                 btrfs_mark_buffer_dirty(leaf);
4186
4187                 ret = btrfs_inc_extent_ref(trans, root,
4188                                         new_extent->disk_bytenr,
4189                                         new_extent->disk_num_bytes,
4190                                         leaf->start,
4191                                         root->root_key.objectid,
4192                                         trans->transid,
4193                                         key.objectid, key.offset);
4194                 BUG_ON(ret);
4195                 ret = btrfs_free_extent(trans, root,
4196                                         bytenr, num_bytes, leaf->start,
4197                                         btrfs_header_owner(leaf),
4198                                         btrfs_header_generation(leaf),
4199                                         key.objectid, key.offset, 0);
4200                 BUG_ON(ret);
4201                 cond_resched();
4202         }
4203         kfree(new_extent);
4204         BUG_ON(ext_index + 1 != ref->nritems);
4205         btrfs_free_leaf_ref(root, ref);
4206         return 0;
4207 }
4208
4209 int btrfs_free_reloc_root(struct btrfs_root *root)
4210 {
4211         struct btrfs_root *reloc_root;
4212
4213         if (root->reloc_root) {
4214                 reloc_root = root->reloc_root;
4215                 root->reloc_root = NULL;
4216                 list_add(&reloc_root->dead_list,
4217                          &root->fs_info->dead_reloc_roots);
4218         }
4219         return 0;
4220 }
4221
4222 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
4223 {
4224         struct btrfs_trans_handle *trans;
4225         struct btrfs_root *reloc_root;
4226         struct btrfs_root *prev_root = NULL;
4227         struct list_head dead_roots;
4228         int ret;
4229         unsigned long nr;
4230
4231         INIT_LIST_HEAD(&dead_roots);
4232         list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
4233
4234         while (!list_empty(&dead_roots)) {
4235                 reloc_root = list_entry(dead_roots.prev,
4236                                         struct btrfs_root, dead_list);
4237                 list_del_init(&reloc_root->dead_list);
4238
4239                 BUG_ON(reloc_root->commit_root != NULL);
4240                 while (1) {
4241                         trans = btrfs_join_transaction(root, 1);
4242                         BUG_ON(!trans);
4243
4244                         mutex_lock(&root->fs_info->drop_mutex);
4245                         ret = btrfs_drop_snapshot(trans, reloc_root);
4246                         if (ret != -EAGAIN)
4247                                 break;
4248                         mutex_unlock(&root->fs_info->drop_mutex);
4249
4250                         nr = trans->blocks_used;
4251                         ret = btrfs_end_transaction(trans, root);
4252                         BUG_ON(ret);
4253                         btrfs_btree_balance_dirty(root, nr);
4254                 }
4255
4256                 free_extent_buffer(reloc_root->node);
4257
4258                 ret = btrfs_del_root(trans, root->fs_info->tree_root,
4259                                      &reloc_root->root_key);
4260                 BUG_ON(ret);
4261                 mutex_unlock(&root->fs_info->drop_mutex);
4262
4263                 nr = trans->blocks_used;
4264                 ret = btrfs_end_transaction(trans, root);
4265                 BUG_ON(ret);
4266                 btrfs_btree_balance_dirty(root, nr);
4267
4268                 kfree(prev_root);
4269                 prev_root = reloc_root;
4270         }
4271         if (prev_root) {
4272                 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
4273                 kfree(prev_root);
4274         }
4275         return 0;
4276 }
4277
4278 int btrfs_add_dead_reloc_root(struct btrfs_root *root)
4279 {
4280         list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
4281         return 0;
4282 }
4283
4284 int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
4285 {
4286         struct btrfs_root *reloc_root;
4287         struct btrfs_trans_handle *trans;
4288         struct btrfs_key location;
4289         int found;
4290         int ret;
4291
4292         mutex_lock(&root->fs_info->tree_reloc_mutex);
4293         ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
4294         BUG_ON(ret);
4295         found = !list_empty(&root->fs_info->dead_reloc_roots);
4296         mutex_unlock(&root->fs_info->tree_reloc_mutex);
4297
4298         if (found) {
4299                 trans = btrfs_start_transaction(root, 1);
4300                 BUG_ON(!trans);
4301                 ret = btrfs_commit_transaction(trans, root);
4302                 BUG_ON(ret);
4303         }
4304
4305         location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
4306         location.offset = (u64)-1;
4307         location.type = BTRFS_ROOT_ITEM_KEY;
4308
4309         reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
4310         BUG_ON(!reloc_root);
4311         btrfs_orphan_cleanup(reloc_root);
4312         return 0;
4313 }
4314
4315 static int noinline init_reloc_tree(struct btrfs_trans_handle *trans,
4316                                     struct btrfs_root *root)
4317 {
4318         struct btrfs_root *reloc_root;
4319         struct extent_buffer *eb;
4320         struct btrfs_root_item *root_item;
4321         struct btrfs_key root_key;
4322         int ret;
4323
4324         BUG_ON(!root->ref_cows);
4325         if (root->reloc_root)
4326                 return 0;
4327
4328         root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
4329         BUG_ON(!root_item);
4330
4331         ret = btrfs_copy_root(trans, root, root->commit_root,
4332                               &eb, BTRFS_TREE_RELOC_OBJECTID);
4333         BUG_ON(ret);
4334
4335         root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4336         root_key.offset = root->root_key.objectid;
4337         root_key.type = BTRFS_ROOT_ITEM_KEY;
4338
4339         memcpy(root_item, &root->root_item, sizeof(root_item));
4340         btrfs_set_root_refs(root_item, 0);
4341         btrfs_set_root_bytenr(root_item, eb->start);
4342         btrfs_set_root_level(root_item, btrfs_header_level(eb));
4343         memset(&root_item->drop_progress, 0, sizeof(root_item->drop_progress));
4344         root_item->drop_level = 0;
4345
4346         btrfs_tree_unlock(eb);
4347         free_extent_buffer(eb);
4348
4349         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
4350                                 &root_key, root_item);
4351         BUG_ON(ret);
4352         kfree(root_item);
4353
4354         reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
4355                                                  &root_key);
4356         BUG_ON(!reloc_root);
4357         reloc_root->last_trans = trans->transid;
4358         reloc_root->commit_root = NULL;
4359         reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
4360
4361         root->reloc_root = reloc_root;
4362         return 0;
4363 }
4364
4365 /*
4366  * Core function of space balance.
4367  *
4368  * The idea is using reloc trees to relocate tree blocks in reference
4369  * counted roots. There is one reloc tree for each subvol, all reloc
4370  * trees share same key objectid. Reloc trees are snapshots of the
4371  * latest committed roots (subvol root->commit_root). To relocate a tree
4372  * block referenced by a subvol, the code COW the block through the reloc
4373  * tree, then update pointer in the subvol to point to the new block.
4374  * Since all reloc trees share same key objectid, we can easily do special
4375  * handing to share tree blocks between reloc trees. Once a tree block has
4376  * been COWed in one reloc tree, we can use the result when the same block
4377  * is COWed again through other reloc trees.
4378  */
4379 static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
4380                                       struct btrfs_root *root,
4381                                       struct btrfs_path *path,
4382                                       struct btrfs_key *first_key,
4383                                       struct btrfs_ref_path *ref_path,
4384                                       struct btrfs_block_group_cache *group,
4385                                       struct inode *reloc_inode)
4386 {
4387         struct btrfs_root *reloc_root;
4388         struct extent_buffer *eb = NULL;
4389         struct btrfs_key *keys;
4390         u64 *nodes;
4391         int level;
4392         int lowest_merge;
4393         int lowest_level = 0;
4394         int update_refs;
4395         int ret;
4396
4397         if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
4398                 lowest_level = ref_path->owner_objectid;
4399
4400         if (is_cowonly_root(ref_path->root_objectid)) {
4401                 path->lowest_level = lowest_level;
4402                 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
4403                 BUG_ON(ret < 0);
4404                 path->lowest_level = 0;
4405                 btrfs_release_path(root, path);
4406                 return 0;
4407         }
4408
4409         keys = kzalloc(sizeof(*keys) * BTRFS_MAX_LEVEL, GFP_NOFS);
4410         BUG_ON(!keys);
4411         nodes = kzalloc(sizeof(*nodes) * BTRFS_MAX_LEVEL, GFP_NOFS);
4412         BUG_ON(!nodes);
4413
4414         mutex_lock(&root->fs_info->tree_reloc_mutex);
4415         ret = init_reloc_tree(trans, root);
4416         BUG_ON(ret);
4417         reloc_root = root->reloc_root;
4418
4419         path->lowest_level = lowest_level;
4420         ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 0);
4421         BUG_ON(ret);
4422         /*
4423          * get relocation mapping for tree blocks in the path
4424          */
4425         lowest_merge = BTRFS_MAX_LEVEL;
4426         for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) {
4427                 u64 new_bytenr;
4428                 eb = path->nodes[level];
4429                 if (!eb || eb == reloc_root->node)
4430                         continue;
4431                 ret = btrfs_get_reloc_mapping(reloc_root, eb->start, eb->len,
4432                                               &new_bytenr);
4433                 if (ret)
4434                         continue;
4435                 if (level == 0)
4436                         btrfs_item_key_to_cpu(eb, &keys[level], 0);
4437                 else
4438                         btrfs_node_key_to_cpu(eb, &keys[level], 0);
4439                 nodes[level] = new_bytenr;
4440                 lowest_merge = level;
4441         }
4442
4443         update_refs = 0;
4444         if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4445                 eb = path->nodes[0];
4446                 if (btrfs_header_generation(eb) < trans->transid)
4447                         update_refs = 1;
4448         }
4449
4450         btrfs_release_path(reloc_root, path);
4451         /*
4452          * merge tree blocks that already relocated in other reloc trees
4453          */
4454         if (lowest_merge != BTRFS_MAX_LEVEL) {
4455                 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
4456                                        lowest_merge);
4457                 BUG_ON(ret < 0);
4458         }
4459         /*
4460          * cow any tree blocks that still haven't been relocated
4461          */
4462         ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 1);
4463         BUG_ON(ret);
4464         /*
4465          * if we are relocating data block group, update extent pointers
4466          * in the newly created tree leaf.
4467          */
4468         eb = path->nodes[0];
4469         if (update_refs && nodes[0] != eb->start) {
4470                 ret = replace_extents_in_leaf(trans, reloc_root, eb, group,
4471                                               reloc_inode);
4472                 BUG_ON(ret);
4473         }
4474
4475         memset(keys, 0, sizeof(*keys) * BTRFS_MAX_LEVEL);
4476         memset(nodes, 0, sizeof(*nodes) * BTRFS_MAX_LEVEL);
4477         for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) {
4478                 eb = path->nodes[level];
4479                 if (!eb || eb == reloc_root->node)
4480                         continue;
4481                 BUG_ON(btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID);
4482                 nodes[level] = eb->start;
4483                 if (level == 0)
4484                         btrfs_item_key_to_cpu(eb, &keys[level], 0);
4485                 else
4486                         btrfs_node_key_to_cpu(eb, &keys[level], 0);
4487         }
4488
4489         if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4490                 eb = path->nodes[0];
4491                 extent_buffer_get(eb);
4492         }
4493         btrfs_release_path(reloc_root, path);
4494         /*
4495          * replace tree blocks in the fs tree with tree blocks in
4496          * the reloc tree.
4497          */
4498         ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
4499         BUG_ON(ret < 0);
4500
4501         if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4502                 ret = invalidate_extent_cache(reloc_root, eb, group, root);
4503                 BUG_ON(ret);
4504                 free_extent_buffer(eb);
4505         }
4506         mutex_unlock(&root->fs_info->tree_reloc_mutex);
4507
4508         path->lowest_level = 0;
4509         kfree(nodes);
4510         kfree(keys);
4511         return 0;
4512 }
4513
4514 static int noinline relocate_tree_block(struct btrfs_trans_handle *trans,
4515                                         struct btrfs_root *root,
4516                                         struct btrfs_path *path,
4517                                         struct btrfs_key *first_key,
4518                                         struct btrfs_ref_path *ref_path)
4519 {
4520         int ret;
4521         int needs_lock = 0;
4522
4523         if (root == root->fs_info->extent_root ||
4524             root == root->fs_info->chunk_root ||
4525             root == root->fs_info->dev_root) {
4526                 needs_lock = 1;
4527                 mutex_lock(&root->fs_info->alloc_mutex);
4528         }
4529
4530         ret = relocate_one_path(trans, root, path, first_key,
4531                                 ref_path, NULL, NULL);
4532         BUG_ON(ret);
4533
4534         if (root == root->fs_info->extent_root)
4535                 btrfs_extent_post_op(trans, root);
4536         if (needs_lock)
4537                 mutex_unlock(&root->fs_info->alloc_mutex);
4538
4539         return 0;
4540 }
4541
4542 static int noinline del_extent_zero(struct btrfs_trans_handle *trans,
4543                                     struct btrfs_root *extent_root,
4544                                     struct btrfs_path *path,
4545                                     struct btrfs_key *extent_key)
4546 {
4547         int ret;
4548
4549         mutex_lock(&extent_root->fs_info->alloc_mutex);
4550         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
4551         if (ret)
4552                 goto out;
4553         ret = btrfs_del_item(trans, extent_root, path);
4554 out:
4555         btrfs_release_path(extent_root, path);
4556         mutex_unlock(&extent_root->fs_info->alloc_mutex);
4557         return ret;
4558 }
4559
4560 static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info,
4561                                                 struct btrfs_ref_path *ref_path)
4562 {
4563         struct btrfs_key root_key;
4564
4565         root_key.objectid = ref_path->root_objectid;
4566         root_key.type = BTRFS_ROOT_ITEM_KEY;
4567         if (is_cowonly_root(ref_path->root_objectid))
4568                 root_key.offset = 0;
4569         else
4570                 root_key.offset = (u64)-1;
4571
4572         return btrfs_read_fs_root_no_name(fs_info, &root_key);
4573 }
4574
4575 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
4576                                         struct btrfs_path *path,
4577                                         struct btrfs_key *extent_key,
4578                                         struct btrfs_block_group_cache *group,
4579                                         struct inode *reloc_inode, int pass)
4580 {
4581         struct btrfs_trans_handle *trans;
4582         struct btrfs_root *found_root;
4583         struct btrfs_ref_path *ref_path = NULL;
4584         struct disk_extent *new_extents = NULL;
4585         int nr_extents = 0;
4586         int loops;
4587         int ret;
4588         int level;
4589         struct btrfs_key first_key;
4590         u64 prev_block = 0;
4591
4592         mutex_unlock(&extent_root->fs_info->alloc_mutex);
4593
4594         trans = btrfs_start_transaction(extent_root, 1);
4595         BUG_ON(!trans);
4596
4597         if (extent_key->objectid == 0) {
4598                 ret = del_extent_zero(trans, extent_root, path, extent_key);
4599                 goto out;
4600         }
4601
4602         ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
4603         if (!ref_path) {
4604                ret = -ENOMEM;
4605                goto out;
4606         }
4607
4608         for (loops = 0; ; loops++) {
4609                 if (loops == 0) {
4610                         ret = btrfs_first_ref_path(trans, extent_root, ref_path,
4611                                                    extent_key->objectid);
4612                 } else {
4613                         ret = btrfs_next_ref_path(trans, extent_root, ref_path);
4614                 }
4615                 if (ret < 0)
4616                         goto out;
4617                 if (ret > 0)
4618                         break;
4619
4620                 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
4621                     ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
4622                         continue;
4623
4624                 found_root = read_ref_root(extent_root->fs_info, ref_path);
4625                 BUG_ON(!found_root);
4626                 /*
4627                  * for reference counted tree, only process reference paths
4628                  * rooted at the latest committed root.
4629                  */
4630                 if (found_root->ref_cows &&
4631                     ref_path->root_generation != found_root->root_key.offset)
4632                         continue;
4633
4634                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4635                         if (pass == 0) {
4636                                 /*
4637                                  * copy data extents to new locations
4638                                  */
4639                                 u64 group_start = group->key.objectid;
4640                                 ret = relocate_data_extent(reloc_inode,
4641                                                            extent_key,
4642                                                            group_start);
4643                                 if (ret < 0)
4644                                         goto out;
4645                                 break;
4646                         }
4647                         level = 0;
4648                 } else {
4649                         level = ref_path->owner_objectid;
4650                 }
4651
4652                 if (prev_block != ref_path->nodes[level]) {
4653                         struct extent_buffer *eb;
4654                         u64 block_start = ref_path->nodes[level];
4655                         u64 block_size = btrfs_level_size(found_root, level);
4656
4657                         eb = read_tree_block(found_root, block_start,
4658                                              block_size, 0);
4659                         btrfs_tree_lock(eb);
4660                         BUG_ON(level != btrfs_header_level(eb));
4661
4662                         if (level == 0)
4663                                 btrfs_item_key_to_cpu(eb, &first_key, 0);
4664                         else
4665                                 btrfs_node_key_to_cpu(eb, &first_key, 0);
4666
4667                         btrfs_tree_unlock(eb);
4668                         free_extent_buffer(eb);
4669                         prev_block = block_start;
4670                 }
4671
4672                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4673                     pass >= 2) {
4674                         /*
4675                          * use fallback method to process the remaining
4676                          * references.
4677                          */
4678                         if (!new_extents) {
4679                                 u64 group_start = group->key.objectid;
4680                                 ret = get_new_locations(reloc_inode,
4681                                                         extent_key,
4682                                                         group_start, 0,
4683                                                         &new_extents,
4684                                                         &nr_extents);
4685                                 if (ret < 0)
4686                                         goto out;
4687                         }
4688                         btrfs_record_root_in_trans(found_root);
4689                         ret = replace_one_extent(trans, found_root,
4690                                                 path, extent_key,
4691                                                 &first_key, ref_path,
4692                                                 new_extents, nr_extents);
4693                         if (ret < 0)
4694                                 goto out;
4695                         continue;
4696                 }
4697
4698                 btrfs_record_root_in_trans(found_root);
4699                 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
4700                         ret = relocate_tree_block(trans, found_root, path,
4701                                                   &first_key, ref_path);
4702                 } else {
4703                         /*
4704                          * try to update data extent references while
4705                          * keeping metadata shared between snapshots.
4706                          */
4707                         ret = relocate_one_path(trans, found_root, path,
4708                                                 &first_key, ref_path,
4709                                                 group, reloc_inode);
4710                 }
4711                 if (ret < 0)
4712                         goto out;
4713         }
4714         ret = 0;
4715 out:
4716         btrfs_end_transaction(trans, extent_root);
4717         kfree(new_extents);
4718         kfree(ref_path);
4719         mutex_lock(&extent_root->fs_info->alloc_mutex);
4720         return ret;
4721 }
4722
4723 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
4724 {
4725         u64 num_devices;
4726         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
4727                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
4728
4729         num_devices = root->fs_info->fs_devices->num_devices;
4730         if (num_devices == 1) {
4731                 stripped |= BTRFS_BLOCK_GROUP_DUP;
4732                 stripped = flags & ~stripped;
4733
4734                 /* turn raid0 into single device chunks */
4735                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
4736                         return stripped;
4737
4738                 /* turn mirroring into duplication */
4739                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
4740                              BTRFS_BLOCK_GROUP_RAID10))
4741                         return stripped | BTRFS_BLOCK_GROUP_DUP;
4742                 return flags;
4743         } else {
4744                 /* they already had raid on here, just return */
4745                 if (flags & stripped)
4746                         return flags;
4747
4748                 stripped |= BTRFS_BLOCK_GROUP_DUP;
4749                 stripped = flags & ~stripped;
4750
4751                 /* switch duplicated blocks with raid1 */
4752                 if (flags & BTRFS_BLOCK_GROUP_DUP)
4753                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
4754
4755                 /* turn single device chunks into raid0 */
4756                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
4757         }
4758         return flags;
4759 }
4760
4761 int __alloc_chunk_for_shrink(struct btrfs_root *root,
4762                      struct btrfs_block_group_cache *shrink_block_group,
4763                      int force)
4764 {
4765         struct btrfs_trans_handle *trans;
4766         u64 new_alloc_flags;
4767         u64 calc;
4768
4769         spin_lock(&shrink_block_group->lock);
4770         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
4771                 spin_unlock(&shrink_block_group->lock);
4772                 mutex_unlock(&root->fs_info->alloc_mutex);
4773
4774                 trans = btrfs_start_transaction(root, 1);
4775                 mutex_lock(&root->fs_info->alloc_mutex);
4776                 spin_lock(&shrink_block_group->lock);
4777
4778                 new_alloc_flags = update_block_group_flags(root,
4779                                                    shrink_block_group->flags);
4780                 if (new_alloc_flags != shrink_block_group->flags) {
4781                         calc =
4782                              btrfs_block_group_used(&shrink_block_group->item);
4783                 } else {
4784                         calc = shrink_block_group->key.offset;
4785                 }
4786                 spin_unlock(&shrink_block_group->lock);
4787
4788                 do_chunk_alloc(trans, root->fs_info->extent_root,
4789                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
4790
4791                 mutex_unlock(&root->fs_info->alloc_mutex);
4792                 btrfs_end_transaction(trans, root);
4793                 mutex_lock(&root->fs_info->alloc_mutex);
4794         } else
4795                 spin_unlock(&shrink_block_group->lock);
4796         return 0;
4797 }
4798
4799 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
4800                                  struct btrfs_root *root,
4801                                  u64 objectid, u64 size)
4802 {
4803         struct btrfs_path *path;
4804         struct btrfs_inode_item *item;
4805         struct extent_buffer *leaf;
4806         int ret;
4807
4808         path = btrfs_alloc_path();
4809         if (!path)
4810                 return -ENOMEM;
4811
4812         ret = btrfs_insert_empty_inode(trans, root, path, objectid);
4813         if (ret)
4814                 goto out;
4815
4816         leaf = path->nodes[0];
4817         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
4818         memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
4819         btrfs_set_inode_generation(leaf, item, 1);
4820         btrfs_set_inode_size(leaf, item, size);
4821         btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
4822         btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM);
4823         btrfs_mark_buffer_dirty(leaf);
4824         btrfs_release_path(root, path);
4825 out:
4826         btrfs_free_path(path);
4827         return ret;
4828 }
4829
4830 static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
4831                                         struct btrfs_block_group_cache *group)
4832 {
4833         struct inode *inode = NULL;
4834         struct btrfs_trans_handle *trans;
4835         struct btrfs_root *root;
4836         struct btrfs_key root_key;
4837         u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
4838         int err = 0;
4839
4840         root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
4841         root_key.type = BTRFS_ROOT_ITEM_KEY;
4842         root_key.offset = (u64)-1;
4843         root = btrfs_read_fs_root_no_name(fs_info, &root_key);
4844         if (IS_ERR(root))
4845                 return ERR_CAST(root);
4846
4847         trans = btrfs_start_transaction(root, 1);
4848         BUG_ON(!trans);
4849
4850         err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
4851         if (err)
4852                 goto out;
4853
4854         err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
4855         BUG_ON(err);
4856
4857         err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
4858                                        group->key.offset, 0);
4859         BUG_ON(err);
4860
4861         inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
4862         if (inode->i_state & I_NEW) {
4863                 BTRFS_I(inode)->root = root;
4864                 BTRFS_I(inode)->location.objectid = objectid;
4865                 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
4866                 BTRFS_I(inode)->location.offset = 0;
4867                 btrfs_read_locked_inode(inode);
4868                 unlock_new_inode(inode);
4869                 BUG_ON(is_bad_inode(inode));
4870         } else {
4871                 BUG_ON(1);
4872         }
4873
4874         err = btrfs_orphan_add(trans, inode);
4875 out:
4876         btrfs_end_transaction(trans, root);
4877         if (err) {
4878                 if (inode)
4879                         iput(inode);
4880                 inode = ERR_PTR(err);
4881         }
4882         return inode;
4883 }
4884
4885 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
4886 {
4887         struct btrfs_trans_handle *trans;
4888         struct btrfs_path *path;
4889         struct btrfs_fs_info *info = root->fs_info;
4890         struct extent_buffer *leaf;
4891         struct inode *reloc_inode;
4892         struct btrfs_block_group_cache *block_group;
4893         struct btrfs_key key;
4894         u64 cur_byte;
4895         u64 total_found;
4896         u32 nritems;
4897         int ret;
4898         int progress;
4899         int pass = 0;
4900
4901         root = root->fs_info->extent_root;
4902
4903         block_group = btrfs_lookup_block_group(info, group_start);
4904         BUG_ON(!block_group);
4905
4906         printk("btrfs relocating block group %llu flags %llu\n",
4907                (unsigned long long)block_group->key.objectid,
4908                (unsigned long long)block_group->flags);
4909
4910         path = btrfs_alloc_path();
4911         BUG_ON(!path);
4912
4913         reloc_inode = create_reloc_inode(info, block_group);
4914         BUG_ON(IS_ERR(reloc_inode));
4915
4916         mutex_lock(&root->fs_info->alloc_mutex);
4917
4918         __alloc_chunk_for_shrink(root, block_group, 1);
4919         block_group->ro = 1;
4920         block_group->space_info->total_bytes -= block_group->key.offset;
4921
4922         mutex_unlock(&root->fs_info->alloc_mutex);
4923
4924         btrfs_start_delalloc_inodes(info->tree_root);
4925         btrfs_wait_ordered_extents(info->tree_root, 0);
4926 again:
4927         total_found = 0;
4928         progress = 0;
4929         key.objectid = block_group->key.objectid;
4930         key.offset = 0;
4931         key.type = 0;
4932         cur_byte = key.objectid;
4933
4934         trans = btrfs_start_transaction(info->tree_root, 1);
4935         btrfs_commit_transaction(trans, info->tree_root);
4936
4937         mutex_lock(&root->fs_info->cleaner_mutex);
4938         btrfs_clean_old_snapshots(info->tree_root);
4939         btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
4940         mutex_unlock(&root->fs_info->cleaner_mutex);
4941
4942         mutex_lock(&root->fs_info->alloc_mutex);
4943
4944         while(1) {
4945                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4946                 if (ret < 0)
4947                         goto out;
4948 next:
4949                 leaf = path->nodes[0];
4950                 nritems = btrfs_header_nritems(leaf);
4951                 if (path->slots[0] >= nritems) {
4952                         ret = btrfs_next_leaf(root, path);
4953                         if (ret < 0)
4954                                 goto out;
4955                         if (ret == 1) {
4956                                 ret = 0;
4957                                 break;
4958                         }
4959                         leaf = path->nodes[0];
4960                         nritems = btrfs_header_nritems(leaf);
4961                 }
4962
4963                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4964
4965                 if (key.objectid >= block_group->key.objectid +
4966                     block_group->key.offset)
4967                         break;
4968
4969                 if (progress && need_resched()) {
4970                         btrfs_release_path(root, path);
4971                         mutex_unlock(&root->fs_info->alloc_mutex);
4972                         cond_resched();
4973                         mutex_lock(&root->fs_info->alloc_mutex);
4974                         progress = 0;
4975                         continue;
4976                 }
4977                 progress = 1;
4978
4979                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
4980                     key.objectid + key.offset <= cur_byte) {
4981                         path->slots[0]++;
4982                         goto next;
4983                 }
4984
4985                 total_found++;
4986                 cur_byte = key.objectid + key.offset;
4987                 btrfs_release_path(root, path);
4988
4989                 __alloc_chunk_for_shrink(root, block_group, 0);
4990                 ret = relocate_one_extent(root, path, &key, block_group,
4991                                           reloc_inode, pass);
4992                 BUG_ON(ret < 0);
4993
4994                 key.objectid = cur_byte;
4995                 key.type = 0;
4996                 key.offset = 0;
4997         }
4998
4999         btrfs_release_path(root, path);
5000         mutex_unlock(&root->fs_info->alloc_mutex);
5001
5002         if (pass == 0) {
5003                 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
5004                 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
5005                 WARN_ON(reloc_inode->i_mapping->nrpages);
5006         }
5007
5008         if (total_found > 0) {
5009                 printk("btrfs found %llu extents in pass %d\n",
5010                        (unsigned long long)total_found, pass);
5011                 pass++;
5012                 goto again;
5013         }
5014
5015         /* delete reloc_inode */
5016         iput(reloc_inode);
5017
5018         /* unpin extents in this range */
5019         trans = btrfs_start_transaction(info->tree_root, 1);
5020         btrfs_commit_transaction(trans, info->tree_root);
5021
5022         mutex_lock(&root->fs_info->alloc_mutex);
5023
5024         spin_lock(&block_group->lock);
5025         WARN_ON(block_group->pinned > 0);
5026         WARN_ON(block_group->reserved > 0);
5027         WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
5028         spin_unlock(&block_group->lock);
5029         ret = 0;
5030 out:
5031         mutex_unlock(&root->fs_info->alloc_mutex);
5032         btrfs_free_path(path);
5033         return ret;
5034 }
5035
5036 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
5037                            struct btrfs_key *key)
5038 {
5039         int ret = 0;
5040         struct btrfs_key found_key;
5041         struct extent_buffer *leaf;
5042         int slot;
5043
5044         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
5045         if (ret < 0)
5046                 goto out;
5047
5048         while(1) {
5049                 slot = path->slots[0];
5050                 leaf = path->nodes[0];
5051                 if (slot >= btrfs_header_nritems(leaf)) {
5052                         ret = btrfs_next_leaf(root, path);
5053                         if (ret == 0)
5054                                 continue;
5055                         if (ret < 0)
5056                                 goto out;
5057                         break;
5058                 }
5059                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5060
5061                 if (found_key.objectid >= key->objectid &&
5062                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5063                         ret = 0;
5064                         goto out;
5065                 }
5066                 path->slots[0]++;
5067         }
5068         ret = -ENOENT;
5069 out:
5070         return ret;
5071 }
5072
5073 int btrfs_free_block_groups(struct btrfs_fs_info *info)
5074 {
5075         struct btrfs_block_group_cache *block_group;
5076         struct rb_node *n;
5077
5078         mutex_lock(&info->alloc_mutex);
5079         spin_lock(&info->block_group_cache_lock);
5080         while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
5081                 block_group = rb_entry(n, struct btrfs_block_group_cache,
5082                                        cache_node);
5083
5084                 spin_unlock(&info->block_group_cache_lock);
5085                 btrfs_remove_free_space_cache(block_group);
5086                 spin_lock(&info->block_group_cache_lock);
5087
5088                 rb_erase(&block_group->cache_node,
5089                          &info->block_group_cache_tree);
5090                 spin_lock(&block_group->space_info->lock);
5091                 list_del(&block_group->list);
5092                 spin_unlock(&block_group->space_info->lock);
5093                 kfree(block_group);
5094         }
5095         spin_unlock(&info->block_group_cache_lock);
5096         mutex_unlock(&info->alloc_mutex);
5097         return 0;
5098 }
5099
5100 int btrfs_read_block_groups(struct btrfs_root *root)
5101 {
5102         struct btrfs_path *path;
5103         int ret;
5104         struct btrfs_block_group_cache *cache;
5105         struct btrfs_fs_info *info = root->fs_info;
5106         struct btrfs_space_info *space_info;
5107         struct btrfs_key key;
5108         struct btrfs_key found_key;
5109         struct extent_buffer *leaf;
5110
5111         root = info->extent_root;
5112         key.objectid = 0;
5113         key.offset = 0;
5114         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
5115         path = btrfs_alloc_path();
5116         if (!path)
5117                 return -ENOMEM;
5118
5119         mutex_lock(&root->fs_info->alloc_mutex);
5120         while(1) {
5121                 ret = find_first_block_group(root, path, &key);
5122                 if (ret > 0) {
5123                         ret = 0;
5124                         goto error;
5125                 }
5126                 if (ret != 0)
5127                         goto error;
5128
5129                 leaf = path->nodes[0];
5130                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5131                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
5132                 if (!cache) {
5133                         ret = -ENOMEM;
5134                         break;
5135                 }
5136
5137                 spin_lock_init(&cache->lock);
5138                 INIT_LIST_HEAD(&cache->list);
5139                 read_extent_buffer(leaf, &cache->item,
5140                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
5141                                    sizeof(cache->item));
5142                 memcpy(&cache->key, &found_key, sizeof(found_key));
5143
5144                 key.objectid = found_key.objectid + found_key.offset;
5145                 btrfs_release_path(root, path);
5146                 cache->flags = btrfs_block_group_flags(&cache->item);
5147
5148                 ret = update_space_info(info, cache->flags, found_key.offset,
5149                                         btrfs_block_group_used(&cache->item),
5150                                         &space_info);
5151                 BUG_ON(ret);
5152                 cache->space_info = space_info;
5153                 spin_lock(&space_info->lock);
5154                 list_add(&cache->list, &space_info->block_groups);
5155                 spin_unlock(&space_info->lock);
5156
5157                 ret = btrfs_add_block_group_cache(root->fs_info, cache);
5158                 BUG_ON(ret);
5159
5160                 set_avail_alloc_bits(root->fs_info, cache->flags);
5161         }
5162         ret = 0;
5163 error:
5164         btrfs_free_path(path);
5165         mutex_unlock(&root->fs_info->alloc_mutex);
5166         return ret;
5167 }
5168
5169 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
5170                            struct btrfs_root *root, u64 bytes_used,
5171                            u64 type, u64 chunk_objectid, u64 chunk_offset,
5172                            u64 size)
5173 {
5174         int ret;
5175         struct btrfs_root *extent_root;
5176         struct btrfs_block_group_cache *cache;
5177
5178         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
5179         extent_root = root->fs_info->extent_root;
5180
5181         root->fs_info->last_trans_new_blockgroup = trans->transid;
5182
5183         cache = kzalloc(sizeof(*cache), GFP_NOFS);
5184         if (!cache)
5185                 return -ENOMEM;
5186
5187         cache->key.objectid = chunk_offset;
5188         cache->key.offset = size;
5189         spin_lock_init(&cache->lock);
5190         INIT_LIST_HEAD(&cache->list);
5191         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
5192
5193         btrfs_set_block_group_used(&cache->item, bytes_used);
5194         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
5195         cache->flags = type;
5196         btrfs_set_block_group_flags(&cache->item, type);
5197
5198         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
5199                                 &cache->space_info);
5200         BUG_ON(ret);
5201         spin_lock(&cache->space_info->lock);
5202         list_add(&cache->list, &cache->space_info->block_groups);
5203         spin_unlock(&cache->space_info->lock);
5204
5205         ret = btrfs_add_block_group_cache(root->fs_info, cache);
5206         BUG_ON(ret);
5207
5208         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
5209                                 sizeof(cache->item));
5210         BUG_ON(ret);
5211
5212         finish_current_insert(trans, extent_root);
5213         ret = del_pending_extents(trans, extent_root);
5214         BUG_ON(ret);
5215         set_avail_alloc_bits(extent_root->fs_info, type);
5216
5217         return 0;
5218 }
5219
5220 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5221                              struct btrfs_root *root, u64 group_start)
5222 {
5223         struct btrfs_path *path;
5224         struct btrfs_block_group_cache *block_group;
5225         struct btrfs_key key;
5226         int ret;
5227
5228         BUG_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
5229         root = root->fs_info->extent_root;
5230
5231         block_group = btrfs_lookup_block_group(root->fs_info, group_start);
5232         BUG_ON(!block_group);
5233
5234         memcpy(&key, &block_group->key, sizeof(key));
5235
5236         path = btrfs_alloc_path();
5237         BUG_ON(!path);
5238
5239         btrfs_remove_free_space_cache(block_group);
5240         rb_erase(&block_group->cache_node,
5241                  &root->fs_info->block_group_cache_tree);
5242         spin_lock(&block_group->space_info->lock);
5243         list_del(&block_group->list);
5244         spin_unlock(&block_group->space_info->lock);
5245
5246         /*
5247         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
5248         kfree(shrink_block_group);
5249         */
5250
5251         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5252         if (ret > 0)
5253                 ret = -EIO;
5254         if (ret < 0)
5255                 goto out;
5256
5257         ret = btrfs_del_item(trans, root, path);
5258 out:
5259         btrfs_free_path(path);
5260         return ret;
5261 }