Btrfs: update nodatacow code v2
[safe/jmp/linux-2.6] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include "hash.h"
23 #include "crc32c.h"
24 #include "ctree.h"
25 #include "disk-io.h"
26 #include "print-tree.h"
27 #include "transaction.h"
28 #include "volumes.h"
29 #include "locking.h"
30 #include "ref-cache.h"
31
32 #define PENDING_EXTENT_INSERT 0
33 #define PENDING_EXTENT_DELETE 1
34 #define PENDING_BACKREF_UPDATE 2
35
36 struct pending_extent_op {
37         int type;
38         u64 bytenr;
39         u64 num_bytes;
40         u64 parent;
41         u64 orig_parent;
42         u64 generation;
43         u64 orig_generation;
44         int level;
45 };
46
47 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
48                                  btrfs_root *extent_root, int all);
49 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
50                                btrfs_root *extent_root, int all);
51 static struct btrfs_block_group_cache *
52 __btrfs_find_block_group(struct btrfs_root *root,
53                          struct btrfs_block_group_cache *hint,
54                          u64 search_start, int data, int owner);
55
56 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
57 {
58         return (cache->flags & bits) == bits;
59 }
60
61 /*
62  * this adds the block group to the fs_info rb tree for the block group
63  * cache
64  */
65 int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
66                                 struct btrfs_block_group_cache *block_group)
67 {
68         struct rb_node **p;
69         struct rb_node *parent = NULL;
70         struct btrfs_block_group_cache *cache;
71
72         spin_lock(&info->block_group_cache_lock);
73         p = &info->block_group_cache_tree.rb_node;
74
75         while (*p) {
76                 parent = *p;
77                 cache = rb_entry(parent, struct btrfs_block_group_cache,
78                                  cache_node);
79                 if (block_group->key.objectid < cache->key.objectid) {
80                         p = &(*p)->rb_left;
81                 } else if (block_group->key.objectid > cache->key.objectid) {
82                         p = &(*p)->rb_right;
83                 } else {
84                         spin_unlock(&info->block_group_cache_lock);
85                         return -EEXIST;
86                 }
87         }
88
89         rb_link_node(&block_group->cache_node, parent, p);
90         rb_insert_color(&block_group->cache_node,
91                         &info->block_group_cache_tree);
92         spin_unlock(&info->block_group_cache_lock);
93
94         return 0;
95 }
96
97 /*
98  * This will return the block group at or after bytenr if contains is 0, else
99  * it will return the block group that contains the bytenr
100  */
101 static struct btrfs_block_group_cache *
102 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
103                               int contains)
104 {
105         struct btrfs_block_group_cache *cache, *ret = NULL;
106         struct rb_node *n;
107         u64 end, start;
108
109         spin_lock(&info->block_group_cache_lock);
110         n = info->block_group_cache_tree.rb_node;
111
112         while (n) {
113                 cache = rb_entry(n, struct btrfs_block_group_cache,
114                                  cache_node);
115                 end = cache->key.objectid + cache->key.offset - 1;
116                 start = cache->key.objectid;
117
118                 if (bytenr < start) {
119                         if (!contains && (!ret || start < ret->key.objectid))
120                                 ret = cache;
121                         n = n->rb_left;
122                 } else if (bytenr > start) {
123                         if (contains && bytenr <= end) {
124                                 ret = cache;
125                                 break;
126                         }
127                         n = n->rb_right;
128                 } else {
129                         ret = cache;
130                         break;
131                 }
132         }
133         spin_unlock(&info->block_group_cache_lock);
134
135         return ret;
136 }
137
138 /*
139  * this is only called by cache_block_group, since we could have freed extents
140  * we need to check the pinned_extents for any extents that can't be used yet
141  * since their free space will be released as soon as the transaction commits.
142  */
143 static int add_new_free_space(struct btrfs_block_group_cache *block_group,
144                               struct btrfs_fs_info *info, u64 start, u64 end)
145 {
146         u64 extent_start, extent_end, size;
147         int ret;
148
149         mutex_lock(&info->pinned_mutex);
150         while (start < end) {
151                 ret = find_first_extent_bit(&info->pinned_extents, start,
152                                             &extent_start, &extent_end,
153                                             EXTENT_DIRTY);
154                 if (ret)
155                         break;
156
157                 if (extent_start == start) {
158                         start = extent_end + 1;
159                 } else if (extent_start > start && extent_start < end) {
160                         size = extent_start - start;
161                         ret = btrfs_add_free_space_lock(block_group, start,
162                                                         size);
163                         BUG_ON(ret);
164                         start = extent_end + 1;
165                 } else {
166                         break;
167                 }
168         }
169
170         if (start < end) {
171                 size = end - start;
172                 ret = btrfs_add_free_space_lock(block_group, start, size);
173                 BUG_ON(ret);
174         }
175         mutex_unlock(&info->pinned_mutex);
176
177         return 0;
178 }
179
180 static int cache_block_group(struct btrfs_root *root,
181                              struct btrfs_block_group_cache *block_group)
182 {
183         struct btrfs_path *path;
184         int ret = 0;
185         struct btrfs_key key;
186         struct extent_buffer *leaf;
187         int slot;
188         u64 last = 0;
189         u64 first_free;
190         int found = 0;
191
192         if (!block_group)
193                 return 0;
194
195         root = root->fs_info->extent_root;
196
197         if (block_group->cached)
198                 return 0;
199
200         path = btrfs_alloc_path();
201         if (!path)
202                 return -ENOMEM;
203
204         path->reada = 2;
205         /*
206          * we get into deadlocks with paths held by callers of this function.
207          * since the alloc_mutex is protecting things right now, just
208          * skip the locking here
209          */
210         path->skip_locking = 1;
211         first_free = max_t(u64, block_group->key.objectid,
212                            BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
213         key.objectid = block_group->key.objectid;
214         key.offset = 0;
215         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
216         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
217         if (ret < 0)
218                 goto err;
219         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
220         if (ret < 0)
221                 goto err;
222         if (ret == 0) {
223                 leaf = path->nodes[0];
224                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
225                 if (key.objectid + key.offset > first_free)
226                         first_free = key.objectid + key.offset;
227         }
228         while(1) {
229                 leaf = path->nodes[0];
230                 slot = path->slots[0];
231                 if (slot >= btrfs_header_nritems(leaf)) {
232                         ret = btrfs_next_leaf(root, path);
233                         if (ret < 0)
234                                 goto err;
235                         if (ret == 0)
236                                 continue;
237                         else
238                                 break;
239                 }
240                 btrfs_item_key_to_cpu(leaf, &key, slot);
241                 if (key.objectid < block_group->key.objectid)
242                         goto next;
243
244                 if (key.objectid >= block_group->key.objectid +
245                     block_group->key.offset)
246                         break;
247
248                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
249                         if (!found) {
250                                 last = first_free;
251                                 found = 1;
252                         }
253
254                         add_new_free_space(block_group, root->fs_info, last,
255                                            key.objectid);
256
257                         last = key.objectid + key.offset;
258                 }
259 next:
260                 path->slots[0]++;
261         }
262
263         if (!found)
264                 last = first_free;
265
266         add_new_free_space(block_group, root->fs_info, last,
267                            block_group->key.objectid +
268                            block_group->key.offset);
269
270         block_group->cached = 1;
271         ret = 0;
272 err:
273         btrfs_free_path(path);
274         return ret;
275 }
276
277 /*
278  * return the block group that starts at or after bytenr
279  */
280 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
281                                                        btrfs_fs_info *info,
282                                                          u64 bytenr)
283 {
284         struct btrfs_block_group_cache *cache;
285
286         cache = block_group_cache_tree_search(info, bytenr, 0);
287
288         return cache;
289 }
290
291 /*
292  * return the block group that contains teh given bytenr
293  */
294 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
295                                                          btrfs_fs_info *info,
296                                                          u64 bytenr)
297 {
298         struct btrfs_block_group_cache *cache;
299
300         cache = block_group_cache_tree_search(info, bytenr, 1);
301
302         return cache;
303 }
304
305 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
306                                                   u64 flags)
307 {
308         struct list_head *head = &info->space_info;
309         struct list_head *cur;
310         struct btrfs_space_info *found;
311         list_for_each(cur, head) {
312                 found = list_entry(cur, struct btrfs_space_info, list);
313                 if (found->flags == flags)
314                         return found;
315         }
316         return NULL;
317 }
318
319 static u64 div_factor(u64 num, int factor)
320 {
321         if (factor == 10)
322                 return num;
323         num *= factor;
324         do_div(num, 10);
325         return num;
326 }
327
328 static struct btrfs_block_group_cache *
329 __btrfs_find_block_group(struct btrfs_root *root,
330                          struct btrfs_block_group_cache *hint,
331                          u64 search_start, int data, int owner)
332 {
333         struct btrfs_block_group_cache *cache;
334         struct btrfs_block_group_cache *found_group = NULL;
335         struct btrfs_fs_info *info = root->fs_info;
336         u64 used;
337         u64 last = 0;
338         u64 free_check;
339         int full_search = 0;
340         int factor = 10;
341         int wrapped = 0;
342
343         if (data & BTRFS_BLOCK_GROUP_METADATA)
344                 factor = 9;
345
346         if (search_start) {
347                 struct btrfs_block_group_cache *shint;
348                 shint = btrfs_lookup_first_block_group(info, search_start);
349                 if (shint && block_group_bits(shint, data) && !shint->ro) {
350                         spin_lock(&shint->lock);
351                         used = btrfs_block_group_used(&shint->item);
352                         if (used + shint->pinned + shint->reserved <
353                             div_factor(shint->key.offset, factor)) {
354                                 spin_unlock(&shint->lock);
355                                 return shint;
356                         }
357                         spin_unlock(&shint->lock);
358                 }
359         }
360         if (hint && !hint->ro && block_group_bits(hint, data)) {
361                 spin_lock(&hint->lock);
362                 used = btrfs_block_group_used(&hint->item);
363                 if (used + hint->pinned + hint->reserved <
364                     div_factor(hint->key.offset, factor)) {
365                         spin_unlock(&hint->lock);
366                         return hint;
367                 }
368                 spin_unlock(&hint->lock);
369                 last = hint->key.objectid + hint->key.offset;
370         } else {
371                 if (hint)
372                         last = max(hint->key.objectid, search_start);
373                 else
374                         last = search_start;
375         }
376 again:
377         while (1) {
378                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
379                 if (!cache)
380                         break;
381
382                 spin_lock(&cache->lock);
383                 last = cache->key.objectid + cache->key.offset;
384                 used = btrfs_block_group_used(&cache->item);
385
386                 if (!cache->ro && block_group_bits(cache, data)) {
387                         free_check = div_factor(cache->key.offset, factor);
388                         if (used + cache->pinned + cache->reserved <
389                             free_check) {
390                                 found_group = cache;
391                                 spin_unlock(&cache->lock);
392                                 goto found;
393                         }
394                 }
395                 spin_unlock(&cache->lock);
396                 cond_resched();
397         }
398         if (!wrapped) {
399                 last = search_start;
400                 wrapped = 1;
401                 goto again;
402         }
403         if (!full_search && factor < 10) {
404                 last = search_start;
405                 full_search = 1;
406                 factor = 10;
407                 goto again;
408         }
409 found:
410         return found_group;
411 }
412
413 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
414                                                  struct btrfs_block_group_cache
415                                                  *hint, u64 search_start,
416                                                  int data, int owner)
417 {
418
419         struct btrfs_block_group_cache *ret;
420         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
421         return ret;
422 }
423
424 /* simple helper to search for an existing extent at a given offset */
425 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
426 {
427         int ret;
428         struct btrfs_key key;
429         struct btrfs_path *path;
430
431         path = btrfs_alloc_path();
432         BUG_ON(!path);
433         key.objectid = start;
434         key.offset = len;
435         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
436         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
437                                 0, 0);
438         btrfs_free_path(path);
439         return ret;
440 }
441
442 /*
443  * Back reference rules.  Back refs have three main goals:
444  *
445  * 1) differentiate between all holders of references to an extent so that
446  *    when a reference is dropped we can make sure it was a valid reference
447  *    before freeing the extent.
448  *
449  * 2) Provide enough information to quickly find the holders of an extent
450  *    if we notice a given block is corrupted or bad.
451  *
452  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
453  *    maintenance.  This is actually the same as #2, but with a slightly
454  *    different use case.
455  *
456  * File extents can be referenced by:
457  *
458  * - multiple snapshots, subvolumes, or different generations in one subvol
459  * - different files inside a single subvolume
460  * - different offsets inside a file (bookend extents in file.c)
461  *
462  * The extent ref structure has fields for:
463  *
464  * - Objectid of the subvolume root
465  * - Generation number of the tree holding the reference
466  * - objectid of the file holding the reference
467  * - number of references holding by parent node (alway 1 for tree blocks)
468  *
469  * Btree leaf may hold multiple references to a file extent. In most cases,
470  * these references are from same file and the corresponding offsets inside
471  * the file are close together.
472  *
473  * When a file extent is allocated the fields are filled in:
474  *     (root_key.objectid, trans->transid, inode objectid, 1)
475  *
476  * When a leaf is cow'd new references are added for every file extent found
477  * in the leaf.  It looks similar to the create case, but trans->transid will
478  * be different when the block is cow'd.
479  *
480  *     (root_key.objectid, trans->transid, inode objectid,
481  *      number of references in the leaf)
482  *
483  * When a file extent is removed either during snapshot deletion or
484  * file truncation, we find the corresponding back reference and check
485  * the following fields:
486  *
487  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
488  *      inode objectid)
489  *
490  * Btree extents can be referenced by:
491  *
492  * - Different subvolumes
493  * - Different generations of the same subvolume
494  *
495  * When a tree block is created, back references are inserted:
496  *
497  * (root->root_key.objectid, trans->transid, level, 1)
498  *
499  * When a tree block is cow'd, new back references are added for all the
500  * blocks it points to. If the tree block isn't in reference counted root,
501  * the old back references are removed. These new back references are of
502  * the form (trans->transid will have increased since creation):
503  *
504  * (root->root_key.objectid, trans->transid, level, 1)
505  *
506  * When a backref is in deleting, the following fields are checked:
507  *
508  * if backref was for a tree root:
509  *     (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
510  * else
511  *     (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
512  *
513  * Back Reference Key composing:
514  *
515  * The key objectid corresponds to the first byte in the extent, the key
516  * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
517  * byte of parent extent. If a extent is tree root, the key offset is set
518  * to the key objectid.
519  */
520
521 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
522                                           struct btrfs_root *root,
523                                           struct btrfs_path *path,
524                                           u64 bytenr, u64 parent,
525                                           u64 ref_root, u64 ref_generation,
526                                           u64 owner_objectid, int del)
527 {
528         struct btrfs_key key;
529         struct btrfs_extent_ref *ref;
530         struct extent_buffer *leaf;
531         u64 ref_objectid;
532         int ret;
533
534         key.objectid = bytenr;
535         key.type = BTRFS_EXTENT_REF_KEY;
536         key.offset = parent;
537
538         ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
539         if (ret < 0)
540                 goto out;
541         if (ret > 0) {
542                 ret = -ENOENT;
543                 goto out;
544         }
545
546         leaf = path->nodes[0];
547         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
548         ref_objectid = btrfs_ref_objectid(leaf, ref);
549         if (btrfs_ref_root(leaf, ref) != ref_root ||
550             btrfs_ref_generation(leaf, ref) != ref_generation ||
551             (ref_objectid != owner_objectid &&
552              ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
553                 ret = -EIO;
554                 WARN_ON(1);
555                 goto out;
556         }
557         ret = 0;
558 out:
559         return ret;
560 }
561
562 static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
563                                           struct btrfs_root *root,
564                                           struct btrfs_path *path,
565                                           u64 bytenr, u64 parent,
566                                           u64 ref_root, u64 ref_generation,
567                                           u64 owner_objectid)
568 {
569         struct btrfs_key key;
570         struct extent_buffer *leaf;
571         struct btrfs_extent_ref *ref;
572         u32 num_refs;
573         int ret;
574
575         key.objectid = bytenr;
576         key.type = BTRFS_EXTENT_REF_KEY;
577         key.offset = parent;
578
579         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
580         if (ret == 0) {
581                 leaf = path->nodes[0];
582                 ref = btrfs_item_ptr(leaf, path->slots[0],
583                                      struct btrfs_extent_ref);
584                 btrfs_set_ref_root(leaf, ref, ref_root);
585                 btrfs_set_ref_generation(leaf, ref, ref_generation);
586                 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
587                 btrfs_set_ref_num_refs(leaf, ref, 1);
588         } else if (ret == -EEXIST) {
589                 u64 existing_owner;
590                 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
591                 leaf = path->nodes[0];
592                 ref = btrfs_item_ptr(leaf, path->slots[0],
593                                      struct btrfs_extent_ref);
594                 if (btrfs_ref_root(leaf, ref) != ref_root ||
595                     btrfs_ref_generation(leaf, ref) != ref_generation) {
596                         ret = -EIO;
597                         WARN_ON(1);
598                         goto out;
599                 }
600
601                 num_refs = btrfs_ref_num_refs(leaf, ref);
602                 BUG_ON(num_refs == 0);
603                 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
604
605                 existing_owner = btrfs_ref_objectid(leaf, ref);
606                 if (existing_owner != owner_objectid &&
607                     existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
608                         btrfs_set_ref_objectid(leaf, ref,
609                                         BTRFS_MULTIPLE_OBJECTIDS);
610                 }
611                 ret = 0;
612         } else {
613                 goto out;
614         }
615         btrfs_mark_buffer_dirty(path->nodes[0]);
616 out:
617         btrfs_release_path(root, path);
618         return ret;
619 }
620
621 static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
622                                           struct btrfs_root *root,
623                                           struct btrfs_path *path)
624 {
625         struct extent_buffer *leaf;
626         struct btrfs_extent_ref *ref;
627         u32 num_refs;
628         int ret = 0;
629
630         leaf = path->nodes[0];
631         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
632         num_refs = btrfs_ref_num_refs(leaf, ref);
633         BUG_ON(num_refs == 0);
634         num_refs -= 1;
635         if (num_refs == 0) {
636                 ret = btrfs_del_item(trans, root, path);
637         } else {
638                 btrfs_set_ref_num_refs(leaf, ref, num_refs);
639                 btrfs_mark_buffer_dirty(leaf);
640         }
641         btrfs_release_path(root, path);
642         return ret;
643 }
644
645 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
646                                      struct btrfs_root *root, u64 bytenr,
647                                      u64 orig_parent, u64 parent,
648                                      u64 orig_root, u64 ref_root,
649                                      u64 orig_generation, u64 ref_generation,
650                                      u64 owner_objectid)
651 {
652         int ret;
653         struct btrfs_root *extent_root = root->fs_info->extent_root;
654         struct btrfs_path *path;
655
656         if (root == root->fs_info->extent_root) {
657                 struct pending_extent_op *extent_op;
658                 u64 num_bytes;
659
660                 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
661                 num_bytes = btrfs_level_size(root, (int)owner_objectid);
662                 mutex_lock(&root->fs_info->extent_ins_mutex);
663                 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
664                                 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
665                         u64 priv;
666                         ret = get_state_private(&root->fs_info->extent_ins,
667                                                 bytenr, &priv);
668                         BUG_ON(ret);
669                         extent_op = (struct pending_extent_op *)
670                                                         (unsigned long)priv;
671                         BUG_ON(extent_op->parent != orig_parent);
672                         BUG_ON(extent_op->generation != orig_generation);
673
674                         extent_op->parent = parent;
675                         extent_op->generation = ref_generation;
676                 } else {
677                         extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
678                         BUG_ON(!extent_op);
679
680                         extent_op->type = PENDING_BACKREF_UPDATE;
681                         extent_op->bytenr = bytenr;
682                         extent_op->num_bytes = num_bytes;
683                         extent_op->parent = parent;
684                         extent_op->orig_parent = orig_parent;
685                         extent_op->generation = ref_generation;
686                         extent_op->orig_generation = orig_generation;
687                         extent_op->level = (int)owner_objectid;
688
689                         set_extent_bits(&root->fs_info->extent_ins,
690                                         bytenr, bytenr + num_bytes - 1,
691                                         EXTENT_WRITEBACK, GFP_NOFS);
692                         set_state_private(&root->fs_info->extent_ins,
693                                           bytenr, (unsigned long)extent_op);
694                 }
695                 mutex_unlock(&root->fs_info->extent_ins_mutex);
696                 return 0;
697         }
698
699         path = btrfs_alloc_path();
700         if (!path)
701                 return -ENOMEM;
702         ret = lookup_extent_backref(trans, extent_root, path,
703                                     bytenr, orig_parent, orig_root,
704                                     orig_generation, owner_objectid, 1);
705         if (ret)
706                 goto out;
707         ret = remove_extent_backref(trans, extent_root, path);
708         if (ret)
709                 goto out;
710         ret = insert_extent_backref(trans, extent_root, path, bytenr,
711                                     parent, ref_root, ref_generation,
712                                     owner_objectid);
713         BUG_ON(ret);
714         finish_current_insert(trans, extent_root, 0);
715         del_pending_extents(trans, extent_root, 0);
716 out:
717         btrfs_free_path(path);
718         return ret;
719 }
720
721 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
722                             struct btrfs_root *root, u64 bytenr,
723                             u64 orig_parent, u64 parent,
724                             u64 ref_root, u64 ref_generation,
725                             u64 owner_objectid)
726 {
727         int ret;
728         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
729             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
730                 return 0;
731         ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
732                                         parent, ref_root, ref_root,
733                                         ref_generation, ref_generation,
734                                         owner_objectid);
735         return ret;
736 }
737
738 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
739                                   struct btrfs_root *root, u64 bytenr,
740                                   u64 orig_parent, u64 parent,
741                                   u64 orig_root, u64 ref_root,
742                                   u64 orig_generation, u64 ref_generation,
743                                   u64 owner_objectid)
744 {
745         struct btrfs_path *path;
746         int ret;
747         struct btrfs_key key;
748         struct extent_buffer *l;
749         struct btrfs_extent_item *item;
750         u32 refs;
751
752         path = btrfs_alloc_path();
753         if (!path)
754                 return -ENOMEM;
755
756         path->reada = 1;
757         key.objectid = bytenr;
758         key.type = BTRFS_EXTENT_ITEM_KEY;
759         key.offset = (u64)-1;
760
761         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
762                                 0, 1);
763         if (ret < 0)
764                 return ret;
765         BUG_ON(ret == 0 || path->slots[0] == 0);
766
767         path->slots[0]--;
768         l = path->nodes[0];
769
770         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
771         BUG_ON(key.objectid != bytenr);
772         BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
773
774         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
775         refs = btrfs_extent_refs(l, item);
776         btrfs_set_extent_refs(l, item, refs + 1);
777         btrfs_mark_buffer_dirty(path->nodes[0]);
778
779         btrfs_release_path(root->fs_info->extent_root, path);
780
781         path->reada = 1;
782         ret = insert_extent_backref(trans, root->fs_info->extent_root,
783                                     path, bytenr, parent,
784                                     ref_root, ref_generation,
785                                     owner_objectid);
786         BUG_ON(ret);
787         finish_current_insert(trans, root->fs_info->extent_root, 0);
788         del_pending_extents(trans, root->fs_info->extent_root, 0);
789
790         btrfs_free_path(path);
791         return 0;
792 }
793
794 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
795                          struct btrfs_root *root,
796                          u64 bytenr, u64 num_bytes, u64 parent,
797                          u64 ref_root, u64 ref_generation,
798                          u64 owner_objectid)
799 {
800         int ret;
801         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
802             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
803                 return 0;
804         ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
805                                      0, ref_root, 0, ref_generation,
806                                      owner_objectid);
807         return ret;
808 }
809
810 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
811                          struct btrfs_root *root)
812 {
813         finish_current_insert(trans, root->fs_info->extent_root, 1);
814         del_pending_extents(trans, root->fs_info->extent_root, 1);
815         return 0;
816 }
817
818 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
819                             struct btrfs_root *root, u64 bytenr,
820                             u64 num_bytes, u32 *refs)
821 {
822         struct btrfs_path *path;
823         int ret;
824         struct btrfs_key key;
825         struct extent_buffer *l;
826         struct btrfs_extent_item *item;
827
828         WARN_ON(num_bytes < root->sectorsize);
829         path = btrfs_alloc_path();
830         path->reada = 1;
831         key.objectid = bytenr;
832         key.offset = num_bytes;
833         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
834         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
835                                 0, 0);
836         if (ret < 0)
837                 goto out;
838         if (ret != 0) {
839                 btrfs_print_leaf(root, path->nodes[0]);
840                 printk("failed to find block number %Lu\n", bytenr);
841                 BUG();
842         }
843         l = path->nodes[0];
844         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
845         *refs = btrfs_extent_refs(l, item);
846 out:
847         btrfs_free_path(path);
848         return 0;
849 }
850
851 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
852                           struct btrfs_root *root, u64 bytenr)
853 {
854         struct btrfs_root *extent_root = root->fs_info->extent_root;
855         struct btrfs_path *path;
856         struct extent_buffer *leaf;
857         struct btrfs_extent_ref *ref_item;
858         struct btrfs_key key;
859         struct btrfs_key found_key;
860         u64 ref_root;
861         u64 last_snapshot;
862         u32 nritems;
863         int ret;
864
865         key.objectid = bytenr;
866         key.offset = (u64)-1;
867         key.type = BTRFS_EXTENT_ITEM_KEY;
868
869         path = btrfs_alloc_path();
870         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
871         if (ret < 0)
872                 goto out;
873         BUG_ON(ret == 0);
874
875         ret = -ENOENT;
876         if (path->slots[0] == 0)
877                 goto out;
878
879         path->slots[0]--;
880         leaf = path->nodes[0];
881         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
882
883         if (found_key.objectid != bytenr ||
884             found_key.type != BTRFS_EXTENT_ITEM_KEY)
885                 goto out;
886
887         last_snapshot = btrfs_root_last_snapshot(&root->root_item);
888         while (1) {
889                 leaf = path->nodes[0];
890                 nritems = btrfs_header_nritems(leaf);
891                 if (path->slots[0] >= nritems) {
892                         ret = btrfs_next_leaf(extent_root, path);
893                         if (ret < 0)
894                                 goto out;
895                         if (ret == 0)
896                                 continue;
897                         break;
898                 }
899                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
900                 if (found_key.objectid != bytenr)
901                         break;
902
903                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
904                         path->slots[0]++;
905                         continue;
906                 }
907
908                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
909                                           struct btrfs_extent_ref);
910                 ref_root = btrfs_ref_root(leaf, ref_item);
911                 if (ref_root != root->root_key.objectid &&
912                     ref_root != BTRFS_TREE_LOG_OBJECTID) {
913                         ret = 1;
914                         goto out;
915                 }
916                 if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) {
917                         ret = 1;
918                         goto out;
919                 }
920
921                 path->slots[0]++;
922         }
923         ret = 0;
924 out:
925         btrfs_free_path(path);
926         return ret;
927 }
928
929 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
930                     struct extent_buffer *buf, u32 nr_extents)
931 {
932         struct btrfs_key key;
933         struct btrfs_file_extent_item *fi;
934         u64 root_gen;
935         u32 nritems;
936         int i;
937         int level;
938         int ret = 0;
939         int shared = 0;
940
941         if (!root->ref_cows)
942                 return 0;
943
944         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
945                 shared = 0;
946                 root_gen = root->root_key.offset;
947         } else {
948                 shared = 1;
949                 root_gen = trans->transid - 1;
950         }
951
952         level = btrfs_header_level(buf);
953         nritems = btrfs_header_nritems(buf);
954
955         if (level == 0) {
956                 struct btrfs_leaf_ref *ref;
957                 struct btrfs_extent_info *info;
958
959                 ref = btrfs_alloc_leaf_ref(root, nr_extents);
960                 if (!ref) {
961                         ret = -ENOMEM;
962                         goto out;
963                 }
964
965                 ref->root_gen = root_gen;
966                 ref->bytenr = buf->start;
967                 ref->owner = btrfs_header_owner(buf);
968                 ref->generation = btrfs_header_generation(buf);
969                 ref->nritems = nr_extents;
970                 info = ref->extents;
971
972                 for (i = 0; nr_extents > 0 && i < nritems; i++) {
973                         u64 disk_bytenr;
974                         btrfs_item_key_to_cpu(buf, &key, i);
975                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
976                                 continue;
977                         fi = btrfs_item_ptr(buf, i,
978                                             struct btrfs_file_extent_item);
979                         if (btrfs_file_extent_type(buf, fi) ==
980                             BTRFS_FILE_EXTENT_INLINE)
981                                 continue;
982                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
983                         if (disk_bytenr == 0)
984                                 continue;
985
986                         info->bytenr = disk_bytenr;
987                         info->num_bytes =
988                                 btrfs_file_extent_disk_num_bytes(buf, fi);
989                         info->objectid = key.objectid;
990                         info->offset = key.offset;
991                         info++;
992                 }
993
994                 ret = btrfs_add_leaf_ref(root, ref, shared);
995                 if (ret == -EEXIST && shared) {
996                         struct btrfs_leaf_ref *old;
997                         old = btrfs_lookup_leaf_ref(root, ref->bytenr);
998                         BUG_ON(!old);
999                         btrfs_remove_leaf_ref(root, old);
1000                         btrfs_free_leaf_ref(root, old);
1001                         ret = btrfs_add_leaf_ref(root, ref, shared);
1002                 }
1003                 WARN_ON(ret);
1004                 btrfs_free_leaf_ref(root, ref);
1005         }
1006 out:
1007         return ret;
1008 }
1009
1010 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1011                   struct extent_buffer *orig_buf, struct extent_buffer *buf,
1012                   u32 *nr_extents)
1013 {
1014         u64 bytenr;
1015         u64 ref_root;
1016         u64 orig_root;
1017         u64 ref_generation;
1018         u64 orig_generation;
1019         u32 nritems;
1020         u32 nr_file_extents = 0;
1021         struct btrfs_key key;
1022         struct btrfs_file_extent_item *fi;
1023         int i;
1024         int level;
1025         int ret = 0;
1026         int faili = 0;
1027         int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1028                             u64, u64, u64, u64, u64, u64, u64, u64);
1029
1030         ref_root = btrfs_header_owner(buf);
1031         ref_generation = btrfs_header_generation(buf);
1032         orig_root = btrfs_header_owner(orig_buf);
1033         orig_generation = btrfs_header_generation(orig_buf);
1034
1035         nritems = btrfs_header_nritems(buf);
1036         level = btrfs_header_level(buf);
1037
1038         if (root->ref_cows) {
1039                 process_func = __btrfs_inc_extent_ref;
1040         } else {
1041                 if (level == 0 &&
1042                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1043                         goto out;
1044                 if (level != 0 &&
1045                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1046                         goto out;
1047                 process_func = __btrfs_update_extent_ref;
1048         }
1049
1050         for (i = 0; i < nritems; i++) {
1051                 cond_resched();
1052                 if (level == 0) {
1053                         btrfs_item_key_to_cpu(buf, &key, i);
1054                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1055                                 continue;
1056                         fi = btrfs_item_ptr(buf, i,
1057                                             struct btrfs_file_extent_item);
1058                         if (btrfs_file_extent_type(buf, fi) ==
1059                             BTRFS_FILE_EXTENT_INLINE)
1060                                 continue;
1061                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1062                         if (bytenr == 0)
1063                                 continue;
1064
1065                         nr_file_extents++;
1066
1067                         ret = process_func(trans, root, bytenr,
1068                                            orig_buf->start, buf->start,
1069                                            orig_root, ref_root,
1070                                            orig_generation, ref_generation,
1071                                            key.objectid);
1072
1073                         if (ret) {
1074                                 faili = i;
1075                                 WARN_ON(1);
1076                                 goto fail;
1077                         }
1078                 } else {
1079                         bytenr = btrfs_node_blockptr(buf, i);
1080                         ret = process_func(trans, root, bytenr,
1081                                            orig_buf->start, buf->start,
1082                                            orig_root, ref_root,
1083                                            orig_generation, ref_generation,
1084                                            level - 1);
1085                         if (ret) {
1086                                 faili = i;
1087                                 WARN_ON(1);
1088                                 goto fail;
1089                         }
1090                 }
1091         }
1092 out:
1093         if (nr_extents) {
1094                 if (level == 0)
1095                         *nr_extents = nr_file_extents;
1096                 else
1097                         *nr_extents = nritems;
1098         }
1099         return 0;
1100 fail:
1101         WARN_ON(1);
1102         return ret;
1103 }
1104
1105 int btrfs_update_ref(struct btrfs_trans_handle *trans,
1106                      struct btrfs_root *root, struct extent_buffer *orig_buf,
1107                      struct extent_buffer *buf, int start_slot, int nr)
1108
1109 {
1110         u64 bytenr;
1111         u64 ref_root;
1112         u64 orig_root;
1113         u64 ref_generation;
1114         u64 orig_generation;
1115         struct btrfs_key key;
1116         struct btrfs_file_extent_item *fi;
1117         int i;
1118         int ret;
1119         int slot;
1120         int level;
1121
1122         BUG_ON(start_slot < 0);
1123         BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1124
1125         ref_root = btrfs_header_owner(buf);
1126         ref_generation = btrfs_header_generation(buf);
1127         orig_root = btrfs_header_owner(orig_buf);
1128         orig_generation = btrfs_header_generation(orig_buf);
1129         level = btrfs_header_level(buf);
1130
1131         if (!root->ref_cows) {
1132                 if (level == 0 &&
1133                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1134                         return 0;
1135                 if (level != 0 &&
1136                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1137                         return 0;
1138         }
1139
1140         for (i = 0, slot = start_slot; i < nr; i++, slot++) {
1141                 cond_resched();
1142                 if (level == 0) {
1143                         btrfs_item_key_to_cpu(buf, &key, slot);
1144                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1145                                 continue;
1146                         fi = btrfs_item_ptr(buf, slot,
1147                                             struct btrfs_file_extent_item);
1148                         if (btrfs_file_extent_type(buf, fi) ==
1149                             BTRFS_FILE_EXTENT_INLINE)
1150                                 continue;
1151                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1152                         if (bytenr == 0)
1153                                 continue;
1154                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1155                                             orig_buf->start, buf->start,
1156                                             orig_root, ref_root,
1157                                             orig_generation, ref_generation,
1158                                             key.objectid);
1159                         if (ret)
1160                                 goto fail;
1161                 } else {
1162                         bytenr = btrfs_node_blockptr(buf, slot);
1163                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1164                                             orig_buf->start, buf->start,
1165                                             orig_root, ref_root,
1166                                             orig_generation, ref_generation,
1167                                             level - 1);
1168                         if (ret)
1169                                 goto fail;
1170                 }
1171         }
1172         return 0;
1173 fail:
1174         WARN_ON(1);
1175         return -1;
1176 }
1177
1178 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1179                                  struct btrfs_root *root,
1180                                  struct btrfs_path *path,
1181                                  struct btrfs_block_group_cache *cache)
1182 {
1183         int ret;
1184         int pending_ret;
1185         struct btrfs_root *extent_root = root->fs_info->extent_root;
1186         unsigned long bi;
1187         struct extent_buffer *leaf;
1188
1189         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1190         if (ret < 0)
1191                 goto fail;
1192         BUG_ON(ret);
1193
1194         leaf = path->nodes[0];
1195         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1196         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1197         btrfs_mark_buffer_dirty(leaf);
1198         btrfs_release_path(extent_root, path);
1199 fail:
1200         finish_current_insert(trans, extent_root, 0);
1201         pending_ret = del_pending_extents(trans, extent_root, 0);
1202         if (ret)
1203                 return ret;
1204         if (pending_ret)
1205                 return pending_ret;
1206         return 0;
1207
1208 }
1209
1210 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1211                                    struct btrfs_root *root)
1212 {
1213         struct btrfs_block_group_cache *cache, *entry;
1214         struct rb_node *n;
1215         int err = 0;
1216         int werr = 0;
1217         struct btrfs_path *path;
1218         u64 last = 0;
1219
1220         path = btrfs_alloc_path();
1221         if (!path)
1222                 return -ENOMEM;
1223
1224         while(1) {
1225                 cache = NULL;
1226                 spin_lock(&root->fs_info->block_group_cache_lock);
1227                 for (n = rb_first(&root->fs_info->block_group_cache_tree);
1228                      n; n = rb_next(n)) {
1229                         entry = rb_entry(n, struct btrfs_block_group_cache,
1230                                          cache_node);
1231                         if (entry->dirty) {
1232                                 cache = entry;
1233                                 break;
1234                         }
1235                 }
1236                 spin_unlock(&root->fs_info->block_group_cache_lock);
1237
1238                 if (!cache)
1239                         break;
1240
1241                 cache->dirty = 0;
1242                 last += cache->key.offset;
1243
1244                 err = write_one_cache_group(trans, root,
1245                                             path, cache);
1246                 /*
1247                  * if we fail to write the cache group, we want
1248                  * to keep it marked dirty in hopes that a later
1249                  * write will work
1250                  */
1251                 if (err) {
1252                         werr = err;
1253                         continue;
1254                 }
1255         }
1256         btrfs_free_path(path);
1257         return werr;
1258 }
1259
1260 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1261                              u64 total_bytes, u64 bytes_used,
1262                              struct btrfs_space_info **space_info)
1263 {
1264         struct btrfs_space_info *found;
1265
1266         found = __find_space_info(info, flags);
1267         if (found) {
1268                 spin_lock(&found->lock);
1269                 found->total_bytes += total_bytes;
1270                 found->bytes_used += bytes_used;
1271                 found->full = 0;
1272                 spin_unlock(&found->lock);
1273                 *space_info = found;
1274                 return 0;
1275         }
1276         found = kmalloc(sizeof(*found), GFP_NOFS);
1277         if (!found)
1278                 return -ENOMEM;
1279
1280         list_add(&found->list, &info->space_info);
1281         INIT_LIST_HEAD(&found->block_groups);
1282         init_rwsem(&found->groups_sem);
1283         spin_lock_init(&found->lock);
1284         found->flags = flags;
1285         found->total_bytes = total_bytes;
1286         found->bytes_used = bytes_used;
1287         found->bytes_pinned = 0;
1288         found->bytes_reserved = 0;
1289         found->full = 0;
1290         found->force_alloc = 0;
1291         *space_info = found;
1292         return 0;
1293 }
1294
1295 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1296 {
1297         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1298                                    BTRFS_BLOCK_GROUP_RAID1 |
1299                                    BTRFS_BLOCK_GROUP_RAID10 |
1300                                    BTRFS_BLOCK_GROUP_DUP);
1301         if (extra_flags) {
1302                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1303                         fs_info->avail_data_alloc_bits |= extra_flags;
1304                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1305                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1306                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1307                         fs_info->avail_system_alloc_bits |= extra_flags;
1308         }
1309 }
1310
1311 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1312 {
1313         u64 num_devices = root->fs_info->fs_devices->num_devices;
1314
1315         if (num_devices == 1)
1316                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1317         if (num_devices < 4)
1318                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1319
1320         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1321             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1322                       BTRFS_BLOCK_GROUP_RAID10))) {
1323                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1324         }
1325
1326         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1327             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1328                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1329         }
1330
1331         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1332             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1333              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1334              (flags & BTRFS_BLOCK_GROUP_DUP)))
1335                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1336         return flags;
1337 }
1338
1339 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1340                           struct btrfs_root *extent_root, u64 alloc_bytes,
1341                           u64 flags, int force)
1342 {
1343         struct btrfs_space_info *space_info;
1344         u64 thresh;
1345         u64 start;
1346         u64 num_bytes;
1347         int ret = 0, waited = 0;
1348
1349         flags = reduce_alloc_profile(extent_root, flags);
1350
1351         space_info = __find_space_info(extent_root->fs_info, flags);
1352         if (!space_info) {
1353                 ret = update_space_info(extent_root->fs_info, flags,
1354                                         0, 0, &space_info);
1355                 BUG_ON(ret);
1356         }
1357         BUG_ON(!space_info);
1358
1359         spin_lock(&space_info->lock);
1360         if (space_info->force_alloc) {
1361                 force = 1;
1362                 space_info->force_alloc = 0;
1363         }
1364         if (space_info->full) {
1365                 spin_unlock(&space_info->lock);
1366                 goto out;
1367         }
1368
1369         thresh = div_factor(space_info->total_bytes, 6);
1370         if (!force &&
1371            (space_info->bytes_used + space_info->bytes_pinned +
1372             space_info->bytes_reserved + alloc_bytes) < thresh) {
1373                 spin_unlock(&space_info->lock);
1374                 goto out;
1375         }
1376
1377         spin_unlock(&space_info->lock);
1378
1379         ret = mutex_trylock(&extent_root->fs_info->chunk_mutex);
1380         if (!ret && !force) {
1381                 goto out;
1382         } else if (!ret) {
1383                 mutex_lock(&extent_root->fs_info->chunk_mutex);
1384                 waited = 1;
1385         }
1386
1387         if (waited) {
1388                 spin_lock(&space_info->lock);
1389                 if (space_info->full) {
1390                         spin_unlock(&space_info->lock);
1391                         goto out_unlock;
1392                 }
1393                 spin_unlock(&space_info->lock);
1394         }
1395
1396         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1397         if (ret) {
1398 printk("space info full %Lu\n", flags);
1399                 space_info->full = 1;
1400                 goto out_unlock;
1401         }
1402
1403         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1404                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1405         BUG_ON(ret);
1406 out_unlock:
1407         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1408 out:
1409         return ret;
1410 }
1411
1412 static int update_block_group(struct btrfs_trans_handle *trans,
1413                               struct btrfs_root *root,
1414                               u64 bytenr, u64 num_bytes, int alloc,
1415                               int mark_free)
1416 {
1417         struct btrfs_block_group_cache *cache;
1418         struct btrfs_fs_info *info = root->fs_info;
1419         u64 total = num_bytes;
1420         u64 old_val;
1421         u64 byte_in_group;
1422
1423         while(total) {
1424                 cache = btrfs_lookup_block_group(info, bytenr);
1425                 if (!cache) {
1426                         return -1;
1427                 }
1428                 byte_in_group = bytenr - cache->key.objectid;
1429                 WARN_ON(byte_in_group > cache->key.offset);
1430
1431                 spin_lock(&cache->space_info->lock);
1432                 spin_lock(&cache->lock);
1433                 cache->dirty = 1;
1434                 old_val = btrfs_block_group_used(&cache->item);
1435                 num_bytes = min(total, cache->key.offset - byte_in_group);
1436                 if (alloc) {
1437                         old_val += num_bytes;
1438                         cache->space_info->bytes_used += num_bytes;
1439                         btrfs_set_block_group_used(&cache->item, old_val);
1440                         spin_unlock(&cache->lock);
1441                         spin_unlock(&cache->space_info->lock);
1442                 } else {
1443                         old_val -= num_bytes;
1444                         cache->space_info->bytes_used -= num_bytes;
1445                         btrfs_set_block_group_used(&cache->item, old_val);
1446                         spin_unlock(&cache->lock);
1447                         spin_unlock(&cache->space_info->lock);
1448                         if (mark_free) {
1449                                 int ret;
1450                                 ret = btrfs_add_free_space(cache, bytenr,
1451                                                            num_bytes);
1452                                 if (ret)
1453                                         return -1;
1454                         }
1455                 }
1456                 total -= num_bytes;
1457                 bytenr += num_bytes;
1458         }
1459         return 0;
1460 }
1461
1462 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1463 {
1464         struct btrfs_block_group_cache *cache;
1465
1466         cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
1467         if (!cache)
1468                 return 0;
1469
1470         return cache->key.objectid;
1471 }
1472
1473 int btrfs_update_pinned_extents(struct btrfs_root *root,
1474                                 u64 bytenr, u64 num, int pin)
1475 {
1476         u64 len;
1477         struct btrfs_block_group_cache *cache;
1478         struct btrfs_fs_info *fs_info = root->fs_info;
1479
1480         WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex));
1481         if (pin) {
1482                 set_extent_dirty(&fs_info->pinned_extents,
1483                                 bytenr, bytenr + num - 1, GFP_NOFS);
1484         } else {
1485                 clear_extent_dirty(&fs_info->pinned_extents,
1486                                 bytenr, bytenr + num - 1, GFP_NOFS);
1487         }
1488         while (num > 0) {
1489                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1490                 BUG_ON(!cache);
1491                 len = min(num, cache->key.offset -
1492                           (bytenr - cache->key.objectid));
1493                 if (pin) {
1494                         spin_lock(&cache->space_info->lock);
1495                         spin_lock(&cache->lock);
1496                         cache->pinned += len;
1497                         cache->space_info->bytes_pinned += len;
1498                         spin_unlock(&cache->lock);
1499                         spin_unlock(&cache->space_info->lock);
1500                         fs_info->total_pinned += len;
1501                 } else {
1502                         spin_lock(&cache->space_info->lock);
1503                         spin_lock(&cache->lock);
1504                         cache->pinned -= len;
1505                         cache->space_info->bytes_pinned -= len;
1506                         spin_unlock(&cache->lock);
1507                         spin_unlock(&cache->space_info->lock);
1508                         fs_info->total_pinned -= len;
1509                 }
1510                 bytenr += len;
1511                 num -= len;
1512         }
1513         return 0;
1514 }
1515
1516 static int update_reserved_extents(struct btrfs_root *root,
1517                                    u64 bytenr, u64 num, int reserve)
1518 {
1519         u64 len;
1520         struct btrfs_block_group_cache *cache;
1521         struct btrfs_fs_info *fs_info = root->fs_info;
1522
1523         while (num > 0) {
1524                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1525                 BUG_ON(!cache);
1526                 len = min(num, cache->key.offset -
1527                           (bytenr - cache->key.objectid));
1528
1529                 spin_lock(&cache->space_info->lock);
1530                 spin_lock(&cache->lock);
1531                 if (reserve) {
1532                         cache->reserved += len;
1533                         cache->space_info->bytes_reserved += len;
1534                 } else {
1535                         cache->reserved -= len;
1536                         cache->space_info->bytes_reserved -= len;
1537                 }
1538                 spin_unlock(&cache->lock);
1539                 spin_unlock(&cache->space_info->lock);
1540                 bytenr += len;
1541                 num -= len;
1542         }
1543         return 0;
1544 }
1545
1546 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1547 {
1548         u64 last = 0;
1549         u64 start;
1550         u64 end;
1551         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1552         int ret;
1553
1554         mutex_lock(&root->fs_info->pinned_mutex);
1555         while(1) {
1556                 ret = find_first_extent_bit(pinned_extents, last,
1557                                             &start, &end, EXTENT_DIRTY);
1558                 if (ret)
1559                         break;
1560                 set_extent_dirty(copy, start, end, GFP_NOFS);
1561                 last = end + 1;
1562         }
1563         mutex_unlock(&root->fs_info->pinned_mutex);
1564         return 0;
1565 }
1566
1567 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1568                                struct btrfs_root *root,
1569                                struct extent_io_tree *unpin)
1570 {
1571         u64 start;
1572         u64 end;
1573         int ret;
1574         struct btrfs_block_group_cache *cache;
1575
1576         mutex_lock(&root->fs_info->pinned_mutex);
1577         while(1) {
1578                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1579                                             EXTENT_DIRTY);
1580                 if (ret)
1581                         break;
1582                 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
1583                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1584                 cache = btrfs_lookup_block_group(root->fs_info, start);
1585                 if (cache->cached)
1586                         btrfs_add_free_space(cache, start, end - start + 1);
1587                 if (need_resched()) {
1588                         mutex_unlock(&root->fs_info->pinned_mutex);
1589                         cond_resched();
1590                         mutex_lock(&root->fs_info->pinned_mutex);
1591                 }
1592         }
1593         mutex_unlock(&root->fs_info->pinned_mutex);
1594         return 0;
1595 }
1596
1597 static int finish_current_insert(struct btrfs_trans_handle *trans,
1598                                  struct btrfs_root *extent_root, int all)
1599 {
1600         u64 start;
1601         u64 end;
1602         u64 priv;
1603         u64 search = 0;
1604         struct btrfs_fs_info *info = extent_root->fs_info;
1605         struct btrfs_path *path;
1606         struct btrfs_extent_ref *ref;
1607         struct pending_extent_op *extent_op;
1608         struct btrfs_key key;
1609         struct btrfs_extent_item extent_item;
1610         int ret;
1611         int err = 0;
1612
1613         btrfs_set_stack_extent_refs(&extent_item, 1);
1614         path = btrfs_alloc_path();
1615
1616         while(1) {
1617                 mutex_lock(&info->extent_ins_mutex);
1618                 ret = find_first_extent_bit(&info->extent_ins, search, &start,
1619                                             &end, EXTENT_WRITEBACK);
1620                 if (ret) {
1621                         mutex_unlock(&info->extent_ins_mutex);
1622                         if (search && all) {
1623                                 search = 0;
1624                                 continue;
1625                         }
1626                         break;
1627                 }
1628
1629                 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
1630                 if (!ret) {
1631                         search = end + 1;
1632                         mutex_unlock(&info->extent_ins_mutex);
1633                         cond_resched();
1634                         continue;
1635                 }
1636                 BUG_ON(ret < 0);
1637
1638                 ret = get_state_private(&info->extent_ins, start, &priv);
1639                 BUG_ON(ret);
1640                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
1641
1642                 mutex_unlock(&info->extent_ins_mutex);
1643
1644                 if (extent_op->type == PENDING_EXTENT_INSERT) {
1645                         key.objectid = start;
1646                         key.offset = end + 1 - start;
1647                         key.type = BTRFS_EXTENT_ITEM_KEY;
1648                         err = btrfs_insert_item(trans, extent_root, &key,
1649                                         &extent_item, sizeof(extent_item));
1650                         BUG_ON(err);
1651
1652                         mutex_lock(&info->extent_ins_mutex);
1653                         clear_extent_bits(&info->extent_ins, start, end,
1654                                           EXTENT_WRITEBACK, GFP_NOFS);
1655                         mutex_unlock(&info->extent_ins_mutex);
1656
1657                         err = insert_extent_backref(trans, extent_root, path,
1658                                                 start, extent_op->parent,
1659                                                 extent_root->root_key.objectid,
1660                                                 extent_op->generation,
1661                                                 extent_op->level);
1662                         BUG_ON(err);
1663                 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
1664                         err = lookup_extent_backref(trans, extent_root, path,
1665                                                 start, extent_op->orig_parent,
1666                                                 extent_root->root_key.objectid,
1667                                                 extent_op->orig_generation,
1668                                                 extent_op->level, 0);
1669                         BUG_ON(err);
1670
1671                         mutex_lock(&info->extent_ins_mutex);
1672                         clear_extent_bits(&info->extent_ins, start, end,
1673                                           EXTENT_WRITEBACK, GFP_NOFS);
1674                         mutex_unlock(&info->extent_ins_mutex);
1675
1676                         key.objectid = start;
1677                         key.offset = extent_op->parent;
1678                         key.type = BTRFS_EXTENT_REF_KEY;
1679                         err = btrfs_set_item_key_safe(trans, extent_root, path,
1680                                                       &key);
1681                         BUG_ON(err);
1682                         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1683                                              struct btrfs_extent_ref);
1684                         btrfs_set_ref_generation(path->nodes[0], ref,
1685                                                  extent_op->generation);
1686                         btrfs_mark_buffer_dirty(path->nodes[0]);
1687                         btrfs_release_path(extent_root, path);
1688                 } else {
1689                         BUG_ON(1);
1690                 }
1691                 kfree(extent_op);
1692                 unlock_extent(&info->extent_ins, start, end, GFP_NOFS);
1693                 if (all)
1694                         search = 0;
1695                 else
1696                         search = end + 1;
1697
1698                 cond_resched();
1699         }
1700         btrfs_free_path(path);
1701         return 0;
1702 }
1703
1704 static int pin_down_bytes(struct btrfs_trans_handle *trans,
1705                           struct btrfs_root *root,
1706                           u64 bytenr, u64 num_bytes, int is_data)
1707 {
1708         int err = 0;
1709         struct extent_buffer *buf;
1710
1711         if (is_data)
1712                 goto pinit;
1713
1714         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1715         if (!buf)
1716                 goto pinit;
1717
1718         /* we can reuse a block if it hasn't been written
1719          * and it is from this transaction.  We can't
1720          * reuse anything from the tree log root because
1721          * it has tiny sub-transactions.
1722          */
1723         if (btrfs_buffer_uptodate(buf, 0) &&
1724             btrfs_try_tree_lock(buf)) {
1725                 u64 header_owner = btrfs_header_owner(buf);
1726                 u64 header_transid = btrfs_header_generation(buf);
1727                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
1728                     header_owner != BTRFS_TREE_RELOC_OBJECTID &&
1729                     header_transid == trans->transid &&
1730                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
1731                         clean_tree_block(NULL, root, buf);
1732                         btrfs_tree_unlock(buf);
1733                         free_extent_buffer(buf);
1734                         return 1;
1735                 }
1736                 btrfs_tree_unlock(buf);
1737         }
1738         free_extent_buffer(buf);
1739 pinit:
1740         btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
1741
1742         BUG_ON(err < 0);
1743         return 0;
1744 }
1745
1746 /*
1747  * remove an extent from the root, returns 0 on success
1748  */
1749 static int __free_extent(struct btrfs_trans_handle *trans,
1750                          struct btrfs_root *root,
1751                          u64 bytenr, u64 num_bytes, u64 parent,
1752                          u64 root_objectid, u64 ref_generation,
1753                          u64 owner_objectid, int pin, int mark_free)
1754 {
1755         struct btrfs_path *path;
1756         struct btrfs_key key;
1757         struct btrfs_fs_info *info = root->fs_info;
1758         struct btrfs_root *extent_root = info->extent_root;
1759         struct extent_buffer *leaf;
1760         int ret;
1761         int extent_slot = 0;
1762         int found_extent = 0;
1763         int num_to_del = 1;
1764         struct btrfs_extent_item *ei;
1765         u32 refs;
1766
1767         key.objectid = bytenr;
1768         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1769         key.offset = num_bytes;
1770         path = btrfs_alloc_path();
1771         if (!path)
1772                 return -ENOMEM;
1773
1774         path->reada = 1;
1775         ret = lookup_extent_backref(trans, extent_root, path,
1776                                     bytenr, parent, root_objectid,
1777                                     ref_generation, owner_objectid, 1);
1778         if (ret == 0) {
1779                 struct btrfs_key found_key;
1780                 extent_slot = path->slots[0];
1781                 while(extent_slot > 0) {
1782                         extent_slot--;
1783                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1784                                               extent_slot);
1785                         if (found_key.objectid != bytenr)
1786                                 break;
1787                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1788                             found_key.offset == num_bytes) {
1789                                 found_extent = 1;
1790                                 break;
1791                         }
1792                         if (path->slots[0] - extent_slot > 5)
1793                                 break;
1794                 }
1795                 if (!found_extent) {
1796                         ret = remove_extent_backref(trans, extent_root, path);
1797                         BUG_ON(ret);
1798                         btrfs_release_path(extent_root, path);
1799                         ret = btrfs_search_slot(trans, extent_root,
1800                                                 &key, path, -1, 1);
1801                         BUG_ON(ret);
1802                         extent_slot = path->slots[0];
1803                 }
1804         } else {
1805                 btrfs_print_leaf(extent_root, path->nodes[0]);
1806                 WARN_ON(1);
1807                 printk("Unable to find ref byte nr %Lu root %Lu "
1808                        "gen %Lu owner %Lu\n", bytenr,
1809                        root_objectid, ref_generation, owner_objectid);
1810         }
1811
1812         leaf = path->nodes[0];
1813         ei = btrfs_item_ptr(leaf, extent_slot,
1814                             struct btrfs_extent_item);
1815         refs = btrfs_extent_refs(leaf, ei);
1816         BUG_ON(refs == 0);
1817         refs -= 1;
1818         btrfs_set_extent_refs(leaf, ei, refs);
1819
1820         btrfs_mark_buffer_dirty(leaf);
1821
1822         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1823                 struct btrfs_extent_ref *ref;
1824                 ref = btrfs_item_ptr(leaf, path->slots[0],
1825                                      struct btrfs_extent_ref);
1826                 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
1827                 /* if the back ref and the extent are next to each other
1828                  * they get deleted below in one shot
1829                  */
1830                 path->slots[0] = extent_slot;
1831                 num_to_del = 2;
1832         } else if (found_extent) {
1833                 /* otherwise delete the extent back ref */
1834                 ret = remove_extent_backref(trans, extent_root, path);
1835                 BUG_ON(ret);
1836                 /* if refs are 0, we need to setup the path for deletion */
1837                 if (refs == 0) {
1838                         btrfs_release_path(extent_root, path);
1839                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1840                                                 -1, 1);
1841                         BUG_ON(ret);
1842                 }
1843         }
1844
1845         if (refs == 0) {
1846                 u64 super_used;
1847                 u64 root_used;
1848 #ifdef BIO_RW_DISCARD
1849                 u64 map_length = num_bytes;
1850                 struct btrfs_multi_bio *multi = NULL;
1851 #endif
1852
1853                 if (pin) {
1854                         mutex_lock(&root->fs_info->pinned_mutex);
1855                         ret = pin_down_bytes(trans, root, bytenr, num_bytes,
1856                                 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
1857                         mutex_unlock(&root->fs_info->pinned_mutex);
1858                         if (ret > 0)
1859                                 mark_free = 1;
1860                         BUG_ON(ret < 0);
1861                 }
1862
1863                 /* block accounting for super block */
1864                 spin_lock_irq(&info->delalloc_lock);
1865                 super_used = btrfs_super_bytes_used(&info->super_copy);
1866                 btrfs_set_super_bytes_used(&info->super_copy,
1867                                            super_used - num_bytes);
1868                 spin_unlock_irq(&info->delalloc_lock);
1869
1870                 /* block accounting for root item */
1871                 root_used = btrfs_root_used(&root->root_item);
1872                 btrfs_set_root_used(&root->root_item,
1873                                            root_used - num_bytes);
1874                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1875                                       num_to_del);
1876                 BUG_ON(ret);
1877                 btrfs_release_path(extent_root, path);
1878                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1879                                          mark_free);
1880                 BUG_ON(ret);
1881
1882 #ifdef BIO_RW_DISCARD
1883                 /* Tell the block device(s) that the sectors can be discarded */
1884                 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
1885                                       bytenr, &map_length, &multi, 0);
1886                 if (!ret) {
1887                         struct btrfs_bio_stripe *stripe = multi->stripes;
1888                         int i;
1889
1890                         if (map_length > num_bytes)
1891                                 map_length = num_bytes;
1892
1893                         for (i = 0; i < multi->num_stripes; i++, stripe++) {
1894                                 blkdev_issue_discard(stripe->dev->bdev,
1895                                                      stripe->physical >> 9,
1896                                                      map_length >> 9);
1897                         }
1898                         kfree(multi);
1899                 }
1900 #endif
1901         }
1902         btrfs_free_path(path);
1903         finish_current_insert(trans, extent_root, 0);
1904         return ret;
1905 }
1906
1907 /*
1908  * find all the blocks marked as pending in the radix tree and remove
1909  * them from the extent map
1910  */
1911 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1912                                btrfs_root *extent_root, int all)
1913 {
1914         int ret;
1915         int err = 0;
1916         u64 start;
1917         u64 end;
1918         u64 priv;
1919         u64 search = 0;
1920         struct extent_io_tree *pending_del;
1921         struct extent_io_tree *extent_ins;
1922         struct pending_extent_op *extent_op;
1923         struct btrfs_fs_info *info = extent_root->fs_info;
1924
1925         extent_ins = &extent_root->fs_info->extent_ins;
1926         pending_del = &extent_root->fs_info->pending_del;
1927
1928         while(1) {
1929                 mutex_lock(&info->extent_ins_mutex);
1930                 ret = find_first_extent_bit(pending_del, search, &start, &end,
1931                                             EXTENT_WRITEBACK);
1932                 if (ret) {
1933                         mutex_unlock(&info->extent_ins_mutex);
1934                         if (all && search) {
1935                                 search = 0;
1936                                 continue;
1937                         }
1938                         break;
1939                 }
1940
1941                 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS);
1942                 if (!ret) {
1943                         search = end+1;
1944                         mutex_unlock(&info->extent_ins_mutex);
1945                         cond_resched();
1946                         continue;
1947                 }
1948                 BUG_ON(ret < 0);
1949
1950                 ret = get_state_private(pending_del, start, &priv);
1951                 BUG_ON(ret);
1952                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
1953
1954                 clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK,
1955                                   GFP_NOFS);
1956                 if (!test_range_bit(extent_ins, start, end,
1957                                     EXTENT_WRITEBACK, 0)) {
1958                         mutex_unlock(&info->extent_ins_mutex);
1959 free_extent:
1960                         ret = __free_extent(trans, extent_root,
1961                                             start, end + 1 - start,
1962                                             extent_op->orig_parent,
1963                                             extent_root->root_key.objectid,
1964                                             extent_op->orig_generation,
1965                                             extent_op->level, 1, 0);
1966                         kfree(extent_op);
1967                 } else {
1968                         kfree(extent_op);
1969
1970                         ret = get_state_private(&info->extent_ins, start,
1971                                                 &priv);
1972                         BUG_ON(ret);
1973                         extent_op = (struct pending_extent_op *)
1974                                                 (unsigned long)priv;
1975
1976                         clear_extent_bits(&info->extent_ins, start, end,
1977                                           EXTENT_WRITEBACK, GFP_NOFS);
1978
1979                         mutex_unlock(&info->extent_ins_mutex);
1980
1981                         if (extent_op->type == PENDING_BACKREF_UPDATE)
1982                                 goto free_extent;
1983
1984                         mutex_lock(&extent_root->fs_info->pinned_mutex);
1985                         ret = pin_down_bytes(trans, extent_root, start,
1986                                              end + 1 - start, 0);
1987                         mutex_unlock(&extent_root->fs_info->pinned_mutex);
1988
1989                         ret = update_block_group(trans, extent_root, start,
1990                                                 end + 1 - start, 0, ret > 0);
1991
1992                         BUG_ON(ret);
1993                         kfree(extent_op);
1994                 }
1995                 if (ret)
1996                         err = ret;
1997                 unlock_extent(extent_ins, start, end, GFP_NOFS);
1998
1999                 if (all)
2000                         search = 0;
2001                 else
2002                         search = end + 1;
2003                 cond_resched();
2004         }
2005         return err;
2006 }
2007
2008 /*
2009  * remove an extent from the root, returns 0 on success
2010  */
2011 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2012                                struct btrfs_root *root,
2013                                u64 bytenr, u64 num_bytes, u64 parent,
2014                                u64 root_objectid, u64 ref_generation,
2015                                u64 owner_objectid, int pin)
2016 {
2017         struct btrfs_root *extent_root = root->fs_info->extent_root;
2018         int pending_ret;
2019         int ret;
2020
2021         WARN_ON(num_bytes < root->sectorsize);
2022         if (root == extent_root) {
2023                 struct pending_extent_op *extent_op;
2024
2025                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2026                 BUG_ON(!extent_op);
2027
2028                 extent_op->type = PENDING_EXTENT_DELETE;
2029                 extent_op->bytenr = bytenr;
2030                 extent_op->num_bytes = num_bytes;
2031                 extent_op->parent = parent;
2032                 extent_op->orig_parent = parent;
2033                 extent_op->generation = ref_generation;
2034                 extent_op->orig_generation = ref_generation;
2035                 extent_op->level = (int)owner_objectid;
2036
2037                 mutex_lock(&root->fs_info->extent_ins_mutex);
2038                 set_extent_bits(&root->fs_info->pending_del,
2039                                 bytenr, bytenr + num_bytes - 1,
2040                                 EXTENT_WRITEBACK, GFP_NOFS);
2041                 set_state_private(&root->fs_info->pending_del,
2042                                   bytenr, (unsigned long)extent_op);
2043                 mutex_unlock(&root->fs_info->extent_ins_mutex);
2044                 return 0;
2045         }
2046         /* if metadata always pin */
2047         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2048                 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2049                         struct btrfs_block_group_cache *cache;
2050
2051                         /* btrfs_free_reserved_extent */
2052                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
2053                         BUG_ON(!cache);
2054                         btrfs_add_free_space(cache, bytenr, num_bytes);
2055                         update_reserved_extents(root, bytenr, num_bytes, 0);
2056                         return 0;
2057                 }
2058                 pin = 1;
2059         }
2060
2061         /* if data pin when any transaction has committed this */
2062         if (ref_generation != trans->transid)
2063                 pin = 1;
2064
2065         ret = __free_extent(trans, root, bytenr, num_bytes, parent,
2066                             root_objectid, ref_generation,
2067                             owner_objectid, pin, pin == 0);
2068
2069         finish_current_insert(trans, root->fs_info->extent_root, 0);
2070         pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0);
2071         return ret ? ret : pending_ret;
2072 }
2073
2074 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2075                       struct btrfs_root *root,
2076                       u64 bytenr, u64 num_bytes, u64 parent,
2077                       u64 root_objectid, u64 ref_generation,
2078                       u64 owner_objectid, int pin)
2079 {
2080         int ret;
2081
2082         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent,
2083                                   root_objectid, ref_generation,
2084                                   owner_objectid, pin);
2085         return ret;
2086 }
2087
2088 static u64 stripe_align(struct btrfs_root *root, u64 val)
2089 {
2090         u64 mask = ((u64)root->stripesize - 1);
2091         u64 ret = (val + mask) & ~mask;
2092         return ret;
2093 }
2094
2095 /*
2096  * walks the btree of allocated extents and find a hole of a given size.
2097  * The key ins is changed to record the hole:
2098  * ins->objectid == block start
2099  * ins->flags = BTRFS_EXTENT_ITEM_KEY
2100  * ins->offset == number of blocks
2101  * Any available blocks before search_start are skipped.
2102  */
2103 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2104                                      struct btrfs_root *orig_root,
2105                                      u64 num_bytes, u64 empty_size,
2106                                      u64 search_start, u64 search_end,
2107                                      u64 hint_byte, struct btrfs_key *ins,
2108                                      u64 exclude_start, u64 exclude_nr,
2109                                      int data)
2110 {
2111         int ret = 0;
2112         struct btrfs_root * root = orig_root->fs_info->extent_root;
2113         u64 total_needed = num_bytes;
2114         u64 *last_ptr = NULL;
2115         struct btrfs_block_group_cache *block_group = NULL;
2116         int chunk_alloc_done = 0;
2117         int empty_cluster = 2 * 1024 * 1024;
2118         int allowed_chunk_alloc = 0;
2119         struct list_head *head = NULL, *cur = NULL;
2120         int loop = 0;
2121         struct btrfs_space_info *space_info;
2122
2123         WARN_ON(num_bytes < root->sectorsize);
2124         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
2125         ins->objectid = 0;
2126         ins->offset = 0;
2127
2128         if (orig_root->ref_cows || empty_size)
2129                 allowed_chunk_alloc = 1;
2130
2131         if (data & BTRFS_BLOCK_GROUP_METADATA) {
2132                 last_ptr = &root->fs_info->last_alloc;
2133                 empty_cluster = 256 * 1024;
2134         }
2135
2136         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
2137                 last_ptr = &root->fs_info->last_data_alloc;
2138
2139         if (last_ptr) {
2140                 if (*last_ptr)
2141                         hint_byte = *last_ptr;
2142                 else
2143                         empty_size += empty_cluster;
2144         }
2145         search_start = max(search_start, first_logical_byte(root, 0));
2146         search_start = max(search_start, hint_byte);
2147         total_needed += empty_size;
2148
2149         block_group = btrfs_lookup_block_group(root->fs_info, search_start);
2150         space_info = __find_space_info(root->fs_info, data);
2151
2152         down_read(&space_info->groups_sem);
2153         while (1) {
2154                 struct btrfs_free_space *free_space;
2155                 /*
2156                  * the only way this happens if our hint points to a block
2157                  * group thats not of the proper type, while looping this
2158                  * should never happen
2159                  */
2160                 WARN_ON(!block_group);
2161                 mutex_lock(&block_group->alloc_mutex);
2162                 if (unlikely(!block_group_bits(block_group, data)))
2163                         goto new_group;
2164
2165                 ret = cache_block_group(root, block_group);
2166                 if (ret) {
2167                         mutex_unlock(&block_group->alloc_mutex);
2168                         break;
2169                 }
2170
2171                 if (block_group->ro)
2172                         goto new_group;
2173
2174                 free_space = btrfs_find_free_space(block_group, search_start,
2175                                                    total_needed);
2176                 if (free_space) {
2177                         u64 start = block_group->key.objectid;
2178                         u64 end = block_group->key.objectid +
2179                                 block_group->key.offset;
2180
2181                         search_start = stripe_align(root, free_space->offset);
2182
2183                         /* move on to the next group */
2184                         if (search_start + num_bytes >= search_end)
2185                                 goto new_group;
2186
2187                         /* move on to the next group */
2188                         if (search_start + num_bytes > end)
2189                                 goto new_group;
2190
2191                         if (exclude_nr > 0 &&
2192                             (search_start + num_bytes > exclude_start &&
2193                              search_start < exclude_start + exclude_nr)) {
2194                                 search_start = exclude_start + exclude_nr;
2195                                 /*
2196                                  * if search_start is still in this block group
2197                                  * then we just re-search this block group
2198                                  */
2199                                 if (search_start >= start &&
2200                                     search_start < end) {
2201                                         mutex_unlock(&block_group->alloc_mutex);
2202                                         continue;
2203                                 }
2204
2205                                 /* else we go to the next block group */
2206                                 goto new_group;
2207                         }
2208
2209                         ins->objectid = search_start;
2210                         ins->offset = num_bytes;
2211
2212                         btrfs_remove_free_space_lock(block_group, search_start,
2213                                                      num_bytes);
2214                         /* we are all good, lets return */
2215                         mutex_unlock(&block_group->alloc_mutex);
2216                         break;
2217                 }
2218 new_group:
2219                 mutex_unlock(&block_group->alloc_mutex);
2220                 /*
2221                  * Here's how this works.
2222                  * loop == 0: we were searching a block group via a hint
2223                  *              and didn't find anything, so we start at
2224                  *              the head of the block groups and keep searching
2225                  * loop == 1: we're searching through all of the block groups
2226                  *              if we hit the head again we have searched
2227                  *              all of the block groups for this space and we
2228                  *              need to try and allocate, if we cant error out.
2229                  * loop == 2: we allocated more space and are looping through
2230                  *              all of the block groups again.
2231                  */
2232                 if (loop == 0) {
2233                         head = &space_info->block_groups;
2234                         cur = head->next;
2235
2236                         if (last_ptr && *last_ptr) {
2237                                 total_needed += empty_cluster;
2238                                 *last_ptr = 0;
2239                         }
2240                         loop++;
2241                 } else if (loop == 1 && cur == head) {
2242                         if (allowed_chunk_alloc && !chunk_alloc_done) {
2243                                 up_read(&space_info->groups_sem);
2244                                 ret = do_chunk_alloc(trans, root, num_bytes +
2245                                                      2 * 1024 * 1024, data, 1);
2246                                 if (ret < 0)
2247                                         break;
2248                                 down_read(&space_info->groups_sem);
2249                                 loop++;
2250                                 head = &space_info->block_groups;
2251                                 cur = head->next;
2252                                 chunk_alloc_done = 1;
2253                         } else if (!allowed_chunk_alloc) {
2254                                 space_info->force_alloc = 1;
2255                                 break;
2256                         } else {
2257                                 break;
2258                         }
2259                 } else if (cur == head) {
2260                         break;
2261                 }
2262
2263                 block_group = list_entry(cur, struct btrfs_block_group_cache,
2264                                          list);
2265                 search_start = block_group->key.objectid;
2266                 cur = cur->next;
2267         }
2268
2269         /* we found what we needed */
2270         if (ins->objectid) {
2271                 if (!(data & BTRFS_BLOCK_GROUP_DATA))
2272                         trans->block_group = block_group;
2273
2274                 if (last_ptr)
2275                         *last_ptr = ins->objectid + ins->offset;
2276                 ret = 0;
2277         } else if (!ret) {
2278                 ret = -ENOSPC;
2279         }
2280
2281         up_read(&space_info->groups_sem);
2282         return ret;
2283 }
2284
2285 static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
2286 {
2287         struct btrfs_block_group_cache *cache;
2288         struct list_head *l;
2289
2290         printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
2291                info->total_bytes - info->bytes_used - info->bytes_pinned -
2292                info->bytes_reserved, (info->full) ? "" : "not ");
2293
2294         down_read(&info->groups_sem);
2295         list_for_each(l, &info->block_groups) {
2296                 cache = list_entry(l, struct btrfs_block_group_cache, list);
2297                 spin_lock(&cache->lock);
2298                 printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
2299                        "%Lu pinned %Lu reserved\n",
2300                        cache->key.objectid, cache->key.offset,
2301                        btrfs_block_group_used(&cache->item),
2302                        cache->pinned, cache->reserved);
2303                 btrfs_dump_free_space(cache, bytes);
2304                 spin_unlock(&cache->lock);
2305         }
2306         up_read(&info->groups_sem);
2307 }
2308
2309 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2310                                   struct btrfs_root *root,
2311                                   u64 num_bytes, u64 min_alloc_size,
2312                                   u64 empty_size, u64 hint_byte,
2313                                   u64 search_end, struct btrfs_key *ins,
2314                                   u64 data)
2315 {
2316         int ret;
2317         u64 search_start = 0;
2318         u64 alloc_profile;
2319         struct btrfs_fs_info *info = root->fs_info;
2320
2321         if (data) {
2322                 alloc_profile = info->avail_data_alloc_bits &
2323                                 info->data_alloc_profile;
2324                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2325         } else if (root == root->fs_info->chunk_root) {
2326                 alloc_profile = info->avail_system_alloc_bits &
2327                                 info->system_alloc_profile;
2328                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2329         } else {
2330                 alloc_profile = info->avail_metadata_alloc_bits &
2331                                 info->metadata_alloc_profile;
2332                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2333         }
2334 again:
2335         data = reduce_alloc_profile(root, data);
2336         /*
2337          * the only place that sets empty_size is btrfs_realloc_node, which
2338          * is not called recursively on allocations
2339          */
2340         if (empty_size || root->ref_cows) {
2341                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
2342                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2343                                      2 * 1024 * 1024,
2344                                      BTRFS_BLOCK_GROUP_METADATA |
2345                                      (info->metadata_alloc_profile &
2346                                       info->avail_metadata_alloc_bits), 0);
2347                 }
2348                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2349                                      num_bytes + 2 * 1024 * 1024, data, 0);
2350         }
2351
2352         WARN_ON(num_bytes < root->sectorsize);
2353         ret = find_free_extent(trans, root, num_bytes, empty_size,
2354                                search_start, search_end, hint_byte, ins,
2355                                trans->alloc_exclude_start,
2356                                trans->alloc_exclude_nr, data);
2357
2358         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
2359                 num_bytes = num_bytes >> 1;
2360                 num_bytes = num_bytes & ~(root->sectorsize - 1);
2361                 num_bytes = max(num_bytes, min_alloc_size);
2362                 do_chunk_alloc(trans, root->fs_info->extent_root,
2363                                num_bytes, data, 1);
2364                 goto again;
2365         }
2366         if (ret) {
2367                 struct btrfs_space_info *sinfo;
2368
2369                 sinfo = __find_space_info(root->fs_info, data);
2370                 printk("allocation failed flags %Lu, wanted %Lu\n",
2371                        data, num_bytes);
2372                 dump_space_info(sinfo, num_bytes);
2373                 BUG();
2374         }
2375
2376         return ret;
2377 }
2378
2379 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
2380 {
2381         struct btrfs_block_group_cache *cache;
2382
2383         cache = btrfs_lookup_block_group(root->fs_info, start);
2384         if (!cache) {
2385                 printk(KERN_ERR "Unable to find block group for %Lu\n", start);
2386                 return -ENOSPC;
2387         }
2388         btrfs_add_free_space(cache, start, len);
2389         update_reserved_extents(root, start, len, 0);
2390         return 0;
2391 }
2392
2393 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2394                                   struct btrfs_root *root,
2395                                   u64 num_bytes, u64 min_alloc_size,
2396                                   u64 empty_size, u64 hint_byte,
2397                                   u64 search_end, struct btrfs_key *ins,
2398                                   u64 data)
2399 {
2400         int ret;
2401         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
2402                                      empty_size, hint_byte, search_end, ins,
2403                                      data);
2404         update_reserved_extents(root, ins->objectid, ins->offset, 1);
2405         return ret;
2406 }
2407
2408 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2409                                          struct btrfs_root *root, u64 parent,
2410                                          u64 root_objectid, u64 ref_generation,
2411                                          u64 owner, struct btrfs_key *ins)
2412 {
2413         int ret;
2414         int pending_ret;
2415         u64 super_used;
2416         u64 root_used;
2417         u64 num_bytes = ins->offset;
2418         u32 sizes[2];
2419         struct btrfs_fs_info *info = root->fs_info;
2420         struct btrfs_root *extent_root = info->extent_root;
2421         struct btrfs_extent_item *extent_item;
2422         struct btrfs_extent_ref *ref;
2423         struct btrfs_path *path;
2424         struct btrfs_key keys[2];
2425
2426         if (parent == 0)
2427                 parent = ins->objectid;
2428
2429         /* block accounting for super block */
2430         spin_lock_irq(&info->delalloc_lock);
2431         super_used = btrfs_super_bytes_used(&info->super_copy);
2432         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2433         spin_unlock_irq(&info->delalloc_lock);
2434
2435         /* block accounting for root item */
2436         root_used = btrfs_root_used(&root->root_item);
2437         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
2438
2439         if (root == extent_root) {
2440                 struct pending_extent_op *extent_op;
2441
2442                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2443                 BUG_ON(!extent_op);
2444
2445                 extent_op->type = PENDING_EXTENT_INSERT;
2446                 extent_op->bytenr = ins->objectid;
2447                 extent_op->num_bytes = ins->offset;
2448                 extent_op->parent = parent;
2449                 extent_op->orig_parent = 0;
2450                 extent_op->generation = ref_generation;
2451                 extent_op->orig_generation = 0;
2452                 extent_op->level = (int)owner;
2453
2454                 mutex_lock(&root->fs_info->extent_ins_mutex);
2455                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2456                                 ins->objectid + ins->offset - 1,
2457                                 EXTENT_WRITEBACK, GFP_NOFS);
2458                 set_state_private(&root->fs_info->extent_ins,
2459                                   ins->objectid, (unsigned long)extent_op);
2460                 mutex_unlock(&root->fs_info->extent_ins_mutex);
2461                 goto update_block;
2462         }
2463
2464         memcpy(&keys[0], ins, sizeof(*ins));
2465         keys[1].objectid = ins->objectid;
2466         keys[1].type = BTRFS_EXTENT_REF_KEY;
2467         keys[1].offset = parent;
2468         sizes[0] = sizeof(*extent_item);
2469         sizes[1] = sizeof(*ref);
2470
2471         path = btrfs_alloc_path();
2472         BUG_ON(!path);
2473
2474         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2475                                        sizes, 2);
2476         BUG_ON(ret);
2477
2478         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2479                                      struct btrfs_extent_item);
2480         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2481         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2482                              struct btrfs_extent_ref);
2483
2484         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2485         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2486         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2487         btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
2488
2489         btrfs_mark_buffer_dirty(path->nodes[0]);
2490
2491         trans->alloc_exclude_start = 0;
2492         trans->alloc_exclude_nr = 0;
2493         btrfs_free_path(path);
2494         finish_current_insert(trans, extent_root, 0);
2495         pending_ret = del_pending_extents(trans, extent_root, 0);
2496
2497         if (ret)
2498                 goto out;
2499         if (pending_ret) {
2500                 ret = pending_ret;
2501                 goto out;
2502         }
2503
2504 update_block:
2505         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
2506         if (ret) {
2507                 printk("update block group failed for %Lu %Lu\n",
2508                        ins->objectid, ins->offset);
2509                 BUG();
2510         }
2511 out:
2512         return ret;
2513 }
2514
2515 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2516                                 struct btrfs_root *root, u64 parent,
2517                                 u64 root_objectid, u64 ref_generation,
2518                                 u64 owner, struct btrfs_key *ins)
2519 {
2520         int ret;
2521
2522         if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
2523                 return 0;
2524         ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
2525                                             ref_generation, owner, ins);
2526         update_reserved_extents(root, ins->objectid, ins->offset, 0);
2527         return ret;
2528 }
2529
2530 /*
2531  * this is used by the tree logging recovery code.  It records that
2532  * an extent has been allocated and makes sure to clear the free
2533  * space cache bits as well
2534  */
2535 int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
2536                                 struct btrfs_root *root, u64 parent,
2537                                 u64 root_objectid, u64 ref_generation,
2538                                 u64 owner, struct btrfs_key *ins)
2539 {
2540         int ret;
2541         struct btrfs_block_group_cache *block_group;
2542
2543         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2544         mutex_lock(&block_group->alloc_mutex);
2545         cache_block_group(root, block_group);
2546
2547         ret = btrfs_remove_free_space_lock(block_group, ins->objectid,
2548                                            ins->offset);
2549         mutex_unlock(&block_group->alloc_mutex);
2550         BUG_ON(ret);
2551         ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
2552                                             ref_generation, owner, ins);
2553         return ret;
2554 }
2555
2556 /*
2557  * finds a free extent and does all the dirty work required for allocation
2558  * returns the key for the extent through ins, and a tree buffer for
2559  * the first block of the extent through buf.
2560  *
2561  * returns 0 if everything worked, non-zero otherwise.
2562  */
2563 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2564                        struct btrfs_root *root,
2565                        u64 num_bytes, u64 parent, u64 min_alloc_size,
2566                        u64 root_objectid, u64 ref_generation,
2567                        u64 owner_objectid, u64 empty_size, u64 hint_byte,
2568                        u64 search_end, struct btrfs_key *ins, u64 data)
2569 {
2570         int ret;
2571
2572         ret = __btrfs_reserve_extent(trans, root, num_bytes,
2573                                      min_alloc_size, empty_size, hint_byte,
2574                                      search_end, ins, data);
2575         BUG_ON(ret);
2576         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
2577                 ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2578                                         root_objectid, ref_generation,
2579                                         owner_objectid, ins);
2580                 BUG_ON(ret);
2581
2582         } else {
2583                 update_reserved_extents(root, ins->objectid, ins->offset, 1);
2584         }
2585         return ret;
2586 }
2587
2588 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
2589                                             struct btrfs_root *root,
2590                                             u64 bytenr, u32 blocksize)
2591 {
2592         struct extent_buffer *buf;
2593
2594         buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
2595         if (!buf)
2596                 return ERR_PTR(-ENOMEM);
2597         btrfs_set_header_generation(buf, trans->transid);
2598         btrfs_tree_lock(buf);
2599         clean_tree_block(trans, root, buf);
2600         btrfs_set_buffer_uptodate(buf);
2601         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2602                 set_extent_dirty(&root->dirty_log_pages, buf->start,
2603                          buf->start + buf->len - 1, GFP_NOFS);
2604         } else {
2605                 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2606                          buf->start + buf->len - 1, GFP_NOFS);
2607         }
2608         trans->blocks_used++;
2609         return buf;
2610 }
2611
2612 /*
2613  * helper function to allocate a block for a given tree
2614  * returns the tree buffer or NULL.
2615  */
2616 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2617                                              struct btrfs_root *root,
2618                                              u32 blocksize, u64 parent,
2619                                              u64 root_objectid,
2620                                              u64 ref_generation,
2621                                              int level,
2622                                              u64 hint,
2623                                              u64 empty_size)
2624 {
2625         struct btrfs_key ins;
2626         int ret;
2627         struct extent_buffer *buf;
2628
2629         ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
2630                                  root_objectid, ref_generation, level,
2631                                  empty_size, hint, (u64)-1, &ins, 0);
2632         if (ret) {
2633                 BUG_ON(ret > 0);
2634                 return ERR_PTR(ret);
2635         }
2636
2637         buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
2638         return buf;
2639 }
2640
2641 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
2642                         struct btrfs_root *root, struct extent_buffer *leaf)
2643 {
2644         u64 leaf_owner;
2645         u64 leaf_generation;
2646         struct btrfs_key key;
2647         struct btrfs_file_extent_item *fi;
2648         int i;
2649         int nritems;
2650         int ret;
2651
2652         BUG_ON(!btrfs_is_leaf(leaf));
2653         nritems = btrfs_header_nritems(leaf);
2654         leaf_owner = btrfs_header_owner(leaf);
2655         leaf_generation = btrfs_header_generation(leaf);
2656
2657         for (i = 0; i < nritems; i++) {
2658                 u64 disk_bytenr;
2659                 cond_resched();
2660
2661                 btrfs_item_key_to_cpu(leaf, &key, i);
2662                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2663                         continue;
2664                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2665                 if (btrfs_file_extent_type(leaf, fi) ==
2666                     BTRFS_FILE_EXTENT_INLINE)
2667                         continue;
2668                 /*
2669                  * FIXME make sure to insert a trans record that
2670                  * repeats the snapshot del on crash
2671                  */
2672                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2673                 if (disk_bytenr == 0)
2674                         continue;
2675
2676                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2677                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2678                                 leaf->start, leaf_owner, leaf_generation,
2679                                 key.objectid, 0);
2680                 BUG_ON(ret);
2681
2682                 atomic_inc(&root->fs_info->throttle_gen);
2683                 wake_up(&root->fs_info->transaction_throttle);
2684                 cond_resched();
2685         }
2686         return 0;
2687 }
2688
2689 static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
2690                                         struct btrfs_root *root,
2691                                         struct btrfs_leaf_ref *ref)
2692 {
2693         int i;
2694         int ret;
2695         struct btrfs_extent_info *info = ref->extents;
2696
2697         for (i = 0; i < ref->nritems; i++) {
2698                 ret = __btrfs_free_extent(trans, root, info->bytenr,
2699                                           info->num_bytes, ref->bytenr,
2700                                           ref->owner, ref->generation,
2701                                           info->objectid, 0);
2702
2703                 atomic_inc(&root->fs_info->throttle_gen);
2704                 wake_up(&root->fs_info->transaction_throttle);
2705                 cond_resched();
2706
2707                 BUG_ON(ret);
2708                 info++;
2709         }
2710
2711         return 0;
2712 }
2713
2714 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2715                               u32 *refs)
2716 {
2717         int ret;
2718
2719         ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs);
2720         BUG_ON(ret);
2721
2722 #if 0 // some debugging code in case we see problems here
2723         /* if the refs count is one, it won't get increased again.  But
2724          * if the ref count is > 1, someone may be decreasing it at
2725          * the same time we are.
2726          */
2727         if (*refs != 1) {
2728                 struct extent_buffer *eb = NULL;
2729                 eb = btrfs_find_create_tree_block(root, start, len);
2730                 if (eb)
2731                         btrfs_tree_lock(eb);
2732
2733                 mutex_lock(&root->fs_info->alloc_mutex);
2734                 ret = lookup_extent_ref(NULL, root, start, len, refs);
2735                 BUG_ON(ret);
2736                 mutex_unlock(&root->fs_info->alloc_mutex);
2737
2738                 if (eb) {
2739                         btrfs_tree_unlock(eb);
2740                         free_extent_buffer(eb);
2741                 }
2742                 if (*refs == 1) {
2743                         printk("block %llu went down to one during drop_snap\n",
2744                                (unsigned long long)start);
2745                 }
2746
2747         }
2748 #endif
2749
2750         cond_resched();
2751         return ret;
2752 }
2753
2754 /*
2755  * helper function for drop_snapshot, this walks down the tree dropping ref
2756  * counts as it goes.
2757  */
2758 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2759                                    struct btrfs_root *root,
2760                                    struct btrfs_path *path, int *level)
2761 {
2762         u64 root_owner;
2763         u64 root_gen;
2764         u64 bytenr;
2765         u64 ptr_gen;
2766         struct extent_buffer *next;
2767         struct extent_buffer *cur;
2768         struct extent_buffer *parent;
2769         struct btrfs_leaf_ref *ref;
2770         u32 blocksize;
2771         int ret;
2772         u32 refs;
2773
2774         WARN_ON(*level < 0);
2775         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2776         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
2777                                 path->nodes[*level]->len, &refs);
2778         BUG_ON(ret);
2779         if (refs > 1)
2780                 goto out;
2781
2782         /*
2783          * walk down to the last node level and free all the leaves
2784          */
2785         while(*level >= 0) {
2786                 WARN_ON(*level < 0);
2787                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2788                 cur = path->nodes[*level];
2789
2790                 if (btrfs_header_level(cur) != *level)
2791                         WARN_ON(1);
2792
2793                 if (path->slots[*level] >=
2794                     btrfs_header_nritems(cur))
2795                         break;
2796                 if (*level == 0) {
2797                         ret = btrfs_drop_leaf_ref(trans, root, cur);
2798                         BUG_ON(ret);
2799                         break;
2800                 }
2801                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2802                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2803                 blocksize = btrfs_level_size(root, *level - 1);
2804
2805                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
2806                 BUG_ON(ret);
2807                 if (refs != 1) {
2808                         parent = path->nodes[*level];
2809                         root_owner = btrfs_header_owner(parent);
2810                         root_gen = btrfs_header_generation(parent);
2811                         path->slots[*level]++;
2812
2813                         ret = __btrfs_free_extent(trans, root, bytenr,
2814                                                 blocksize, parent->start,
2815                                                 root_owner, root_gen,
2816                                                 *level - 1, 1);
2817                         BUG_ON(ret);
2818
2819                         atomic_inc(&root->fs_info->throttle_gen);
2820                         wake_up(&root->fs_info->transaction_throttle);
2821                         cond_resched();
2822
2823                         continue;
2824                 }
2825                 /*
2826                  * at this point, we have a single ref, and since the
2827                  * only place referencing this extent is a dead root
2828                  * the reference count should never go higher.
2829                  * So, we don't need to check it again
2830                  */
2831                 if (*level == 1) {
2832                         ref = btrfs_lookup_leaf_ref(root, bytenr);
2833                         if (ref && ref->generation != ptr_gen) {
2834                                 btrfs_free_leaf_ref(root, ref);
2835                                 ref = NULL;
2836                         }
2837                         if (ref) {
2838                                 ret = cache_drop_leaf_ref(trans, root, ref);
2839                                 BUG_ON(ret);
2840                                 btrfs_remove_leaf_ref(root, ref);
2841                                 btrfs_free_leaf_ref(root, ref);
2842                                 *level = 0;
2843                                 break;
2844                         }
2845                         if (printk_ratelimit()) {
2846                                 printk("leaf ref miss for bytenr %llu\n",
2847                                        (unsigned long long)bytenr);
2848                         }
2849                 }
2850                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2851                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2852                         free_extent_buffer(next);
2853
2854                         next = read_tree_block(root, bytenr, blocksize,
2855                                                ptr_gen);
2856                         cond_resched();
2857 #if 0
2858                         /*
2859                          * this is a debugging check and can go away
2860                          * the ref should never go all the way down to 1
2861                          * at this point
2862                          */
2863                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2864                                                 &refs);
2865                         BUG_ON(ret);
2866                         WARN_ON(refs != 1);
2867 #endif
2868                 }
2869                 WARN_ON(*level <= 0);
2870                 if (path->nodes[*level-1])
2871                         free_extent_buffer(path->nodes[*level-1]);
2872                 path->nodes[*level-1] = next;
2873                 *level = btrfs_header_level(next);
2874                 path->slots[*level] = 0;
2875                 cond_resched();
2876         }
2877 out:
2878         WARN_ON(*level < 0);
2879         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2880
2881         if (path->nodes[*level] == root->node) {
2882                 parent = path->nodes[*level];
2883                 bytenr = path->nodes[*level]->start;
2884         } else {
2885                 parent = path->nodes[*level + 1];
2886                 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
2887         }
2888
2889         blocksize = btrfs_level_size(root, *level);
2890         root_owner = btrfs_header_owner(parent);
2891         root_gen = btrfs_header_generation(parent);
2892
2893         ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
2894                                   parent->start, root_owner, root_gen,
2895                                   *level, 1);
2896         free_extent_buffer(path->nodes[*level]);
2897         path->nodes[*level] = NULL;
2898         *level += 1;
2899         BUG_ON(ret);
2900
2901         cond_resched();
2902         return 0;
2903 }
2904
2905 /*
2906  * helper function for drop_subtree, this function is similar to
2907  * walk_down_tree. The main difference is that it checks reference
2908  * counts while tree blocks are locked.
2909  */
2910 static int noinline walk_down_subtree(struct btrfs_trans_handle *trans,
2911                                       struct btrfs_root *root,
2912                                       struct btrfs_path *path, int *level)
2913 {
2914         struct extent_buffer *next;
2915         struct extent_buffer *cur;
2916         struct extent_buffer *parent;
2917         u64 bytenr;
2918         u64 ptr_gen;
2919         u32 blocksize;
2920         u32 refs;
2921         int ret;
2922
2923         cur = path->nodes[*level];
2924         ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len,
2925                                       &refs);
2926         BUG_ON(ret);
2927         if (refs > 1)
2928                 goto out;
2929
2930         while (*level >= 0) {
2931                 cur = path->nodes[*level];
2932                 if (*level == 0) {
2933                         ret = btrfs_drop_leaf_ref(trans, root, cur);
2934                         BUG_ON(ret);
2935                         clean_tree_block(trans, root, cur);
2936                         break;
2937                 }
2938                 if (path->slots[*level] >= btrfs_header_nritems(cur)) {
2939                         clean_tree_block(trans, root, cur);
2940                         break;
2941                 }
2942
2943                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2944                 blocksize = btrfs_level_size(root, *level - 1);
2945                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2946
2947                 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
2948                 btrfs_tree_lock(next);
2949
2950                 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
2951                                               &refs);
2952                 BUG_ON(ret);
2953                 if (refs > 1) {
2954                         parent = path->nodes[*level];
2955                         ret = btrfs_free_extent(trans, root, bytenr,
2956                                         blocksize, parent->start,
2957                                         btrfs_header_owner(parent),
2958                                         btrfs_header_generation(parent),
2959                                         *level - 1, 1);
2960                         BUG_ON(ret);
2961                         path->slots[*level]++;
2962                         btrfs_tree_unlock(next);
2963                         free_extent_buffer(next);
2964                         continue;
2965                 }
2966
2967                 *level = btrfs_header_level(next);
2968                 path->nodes[*level] = next;
2969                 path->slots[*level] = 0;
2970                 path->locks[*level] = 1;
2971                 cond_resched();
2972         }
2973 out:
2974         parent = path->nodes[*level + 1];
2975         bytenr = path->nodes[*level]->start;
2976         blocksize = path->nodes[*level]->len;
2977
2978         ret = btrfs_free_extent(trans, root, bytenr, blocksize,
2979                         parent->start, btrfs_header_owner(parent),
2980                         btrfs_header_generation(parent), *level, 1);
2981         BUG_ON(ret);
2982
2983         if (path->locks[*level]) {
2984                 btrfs_tree_unlock(path->nodes[*level]);
2985                 path->locks[*level] = 0;
2986         }
2987         free_extent_buffer(path->nodes[*level]);
2988         path->nodes[*level] = NULL;
2989         *level += 1;
2990         cond_resched();
2991         return 0;
2992 }
2993
2994 /*
2995  * helper for dropping snapshots.  This walks back up the tree in the path
2996  * to find the first node higher up where we haven't yet gone through
2997  * all the slots
2998  */
2999 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3000                                  struct btrfs_root *root,
3001                                  struct btrfs_path *path,
3002                                  int *level, int max_level)
3003 {
3004         u64 root_owner;
3005         u64 root_gen;
3006         struct btrfs_root_item *root_item = &root->root_item;
3007         int i;
3008         int slot;
3009         int ret;
3010
3011         for (i = *level; i < max_level && path->nodes[i]; i++) {
3012                 slot = path->slots[i];
3013                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3014                         struct extent_buffer *node;
3015                         struct btrfs_disk_key disk_key;
3016                         node = path->nodes[i];
3017                         path->slots[i]++;
3018                         *level = i;
3019                         WARN_ON(*level == 0);
3020                         btrfs_node_key(node, &disk_key, path->slots[i]);
3021                         memcpy(&root_item->drop_progress,
3022                                &disk_key, sizeof(disk_key));
3023                         root_item->drop_level = i;
3024                         return 0;
3025                 } else {
3026                         struct extent_buffer *parent;
3027                         if (path->nodes[*level] == root->node)
3028                                 parent = path->nodes[*level];
3029                         else
3030                                 parent = path->nodes[*level + 1];
3031
3032                         root_owner = btrfs_header_owner(parent);
3033                         root_gen = btrfs_header_generation(parent);
3034
3035                         clean_tree_block(trans, root, path->nodes[*level]);
3036                         ret = btrfs_free_extent(trans, root,
3037                                                 path->nodes[*level]->start,
3038                                                 path->nodes[*level]->len,
3039                                                 parent->start, root_owner,
3040                                                 root_gen, *level, 1);
3041                         BUG_ON(ret);
3042                         if (path->locks[*level]) {
3043                                 btrfs_tree_unlock(path->nodes[*level]);
3044                                 path->locks[*level] = 0;
3045                         }
3046                         free_extent_buffer(path->nodes[*level]);
3047                         path->nodes[*level] = NULL;
3048                         *level = i + 1;
3049                 }
3050         }
3051         return 1;
3052 }
3053
3054 /*
3055  * drop the reference count on the tree rooted at 'snap'.  This traverses
3056  * the tree freeing any blocks that have a ref count of zero after being
3057  * decremented.
3058  */
3059 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3060                         *root)
3061 {
3062         int ret = 0;
3063         int wret;
3064         int level;
3065         struct btrfs_path *path;
3066         int i;
3067         int orig_level;
3068         struct btrfs_root_item *root_item = &root->root_item;
3069
3070         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
3071         path = btrfs_alloc_path();
3072         BUG_ON(!path);
3073
3074         level = btrfs_header_level(root->node);
3075         orig_level = level;
3076         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3077                 path->nodes[level] = root->node;
3078                 extent_buffer_get(root->node);
3079                 path->slots[level] = 0;
3080         } else {
3081                 struct btrfs_key key;
3082                 struct btrfs_disk_key found_key;
3083                 struct extent_buffer *node;
3084
3085                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3086                 level = root_item->drop_level;
3087                 path->lowest_level = level;
3088                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3089                 if (wret < 0) {
3090                         ret = wret;
3091                         goto out;
3092                 }
3093                 node = path->nodes[level];
3094                 btrfs_node_key(node, &found_key, path->slots[level]);
3095                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3096                                sizeof(found_key)));
3097                 /*
3098                  * unlock our path, this is safe because only this
3099                  * function is allowed to delete this snapshot
3100                  */
3101                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
3102                         if (path->nodes[i] && path->locks[i]) {
3103                                 path->locks[i] = 0;
3104                                 btrfs_tree_unlock(path->nodes[i]);
3105                         }
3106                 }
3107         }
3108         while(1) {
3109                 wret = walk_down_tree(trans, root, path, &level);
3110                 if (wret > 0)
3111                         break;
3112                 if (wret < 0)
3113                         ret = wret;
3114
3115                 wret = walk_up_tree(trans, root, path, &level,
3116                                     BTRFS_MAX_LEVEL);
3117                 if (wret > 0)
3118                         break;
3119                 if (wret < 0)
3120                         ret = wret;
3121                 if (trans->transaction->in_commit) {
3122                         ret = -EAGAIN;
3123                         break;
3124                 }
3125                 atomic_inc(&root->fs_info->throttle_gen);
3126                 wake_up(&root->fs_info->transaction_throttle);
3127         }
3128         for (i = 0; i <= orig_level; i++) {
3129                 if (path->nodes[i]) {
3130                         free_extent_buffer(path->nodes[i]);
3131                         path->nodes[i] = NULL;
3132                 }
3133         }
3134 out:
3135         btrfs_free_path(path);
3136         return ret;
3137 }
3138
3139 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
3140                         struct btrfs_root *root,
3141                         struct extent_buffer *node,
3142                         struct extent_buffer *parent)
3143 {
3144         struct btrfs_path *path;
3145         int level;
3146         int parent_level;
3147         int ret = 0;
3148         int wret;
3149
3150         path = btrfs_alloc_path();
3151         BUG_ON(!path);
3152
3153         BUG_ON(!btrfs_tree_locked(parent));
3154         parent_level = btrfs_header_level(parent);
3155         extent_buffer_get(parent);
3156         path->nodes[parent_level] = parent;
3157         path->slots[parent_level] = btrfs_header_nritems(parent);
3158
3159         BUG_ON(!btrfs_tree_locked(node));
3160         level = btrfs_header_level(node);
3161         extent_buffer_get(node);
3162         path->nodes[level] = node;
3163         path->slots[level] = 0;
3164
3165         while (1) {
3166                 wret = walk_down_subtree(trans, root, path, &level);
3167                 if (wret < 0)
3168                         ret = wret;
3169                 if (wret != 0)
3170                         break;
3171
3172                 wret = walk_up_tree(trans, root, path, &level, parent_level);
3173                 if (wret < 0)
3174                         ret = wret;
3175                 if (wret != 0)
3176                         break;
3177         }
3178
3179         btrfs_free_path(path);
3180         return ret;
3181 }
3182
3183 static unsigned long calc_ra(unsigned long start, unsigned long last,
3184                              unsigned long nr)
3185 {
3186         return min(last, start + nr - 1);
3187 }
3188
3189 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
3190                                          u64 len)
3191 {
3192         u64 page_start;
3193         u64 page_end;
3194         unsigned long first_index;
3195         unsigned long last_index;
3196         unsigned long i;
3197         struct page *page;
3198         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3199         struct file_ra_state *ra;
3200         struct btrfs_ordered_extent *ordered;
3201         unsigned int total_read = 0;
3202         unsigned int total_dirty = 0;
3203         int ret = 0;
3204
3205         ra = kzalloc(sizeof(*ra), GFP_NOFS);
3206
3207         mutex_lock(&inode->i_mutex);
3208         first_index = start >> PAGE_CACHE_SHIFT;
3209         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
3210
3211         /* make sure the dirty trick played by the caller work */
3212         ret = invalidate_inode_pages2_range(inode->i_mapping,
3213                                             first_index, last_index);
3214         if (ret)
3215                 goto out_unlock;
3216
3217         file_ra_state_init(ra, inode->i_mapping);
3218
3219         for (i = first_index ; i <= last_index; i++) {
3220                 if (total_read % ra->ra_pages == 0) {
3221                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
3222                                        calc_ra(i, last_index, ra->ra_pages));
3223                 }
3224                 total_read++;
3225 again:
3226                 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
3227                         BUG_ON(1);
3228                 page = grab_cache_page(inode->i_mapping, i);
3229                 if (!page) {
3230                         ret = -ENOMEM;
3231                         goto out_unlock;
3232                 }
3233                 if (!PageUptodate(page)) {
3234                         btrfs_readpage(NULL, page);
3235                         lock_page(page);
3236                         if (!PageUptodate(page)) {
3237                                 unlock_page(page);
3238                                 page_cache_release(page);
3239                                 ret = -EIO;
3240                                 goto out_unlock;
3241                         }
3242                 }
3243                 wait_on_page_writeback(page);
3244
3245                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
3246                 page_end = page_start + PAGE_CACHE_SIZE - 1;
3247                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
3248
3249                 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3250                 if (ordered) {
3251                         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3252                         unlock_page(page);
3253                         page_cache_release(page);
3254                         btrfs_start_ordered_extent(inode, ordered, 1);
3255                         btrfs_put_ordered_extent(ordered);
3256                         goto again;
3257                 }
3258                 set_page_extent_mapped(page);
3259
3260                 btrfs_set_extent_delalloc(inode, page_start, page_end);
3261                 if (i == first_index)
3262                         set_extent_bits(io_tree, page_start, page_end,
3263                                         EXTENT_BOUNDARY, GFP_NOFS);
3264
3265                 set_page_dirty(page);
3266                 total_dirty++;
3267
3268                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3269                 unlock_page(page);
3270                 page_cache_release(page);
3271         }
3272
3273 out_unlock:
3274         kfree(ra);
3275         mutex_unlock(&inode->i_mutex);
3276         balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
3277         return ret;
3278 }
3279
3280 static int noinline relocate_data_extent(struct inode *reloc_inode,
3281                                          struct btrfs_key *extent_key,
3282                                          u64 offset)
3283 {
3284         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
3285         struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
3286         struct extent_map *em;
3287         u64 start = extent_key->objectid - offset;
3288         u64 end = start + extent_key->offset - 1;
3289
3290         em = alloc_extent_map(GFP_NOFS);
3291         BUG_ON(!em || IS_ERR(em));
3292
3293         em->start = start;
3294         em->len = extent_key->offset;
3295         em->block_len = extent_key->offset;
3296         em->block_start = extent_key->objectid;
3297         em->bdev = root->fs_info->fs_devices->latest_bdev;
3298         set_bit(EXTENT_FLAG_PINNED, &em->flags);
3299
3300         /* setup extent map to cheat btrfs_readpage */
3301         lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
3302         while (1) {
3303                 int ret;
3304                 spin_lock(&em_tree->lock);
3305                 ret = add_extent_mapping(em_tree, em);
3306                 spin_unlock(&em_tree->lock);
3307                 if (ret != -EEXIST) {
3308                         free_extent_map(em);
3309                         break;
3310                 }
3311                 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
3312         }
3313         unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
3314
3315         return relocate_inode_pages(reloc_inode, start, extent_key->offset);
3316 }
3317
3318 struct btrfs_ref_path {
3319         u64 extent_start;
3320         u64 nodes[BTRFS_MAX_LEVEL];
3321         u64 root_objectid;
3322         u64 root_generation;
3323         u64 owner_objectid;
3324         u32 num_refs;
3325         int lowest_level;
3326         int current_level;
3327         int shared_level;
3328
3329         struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
3330         u64 new_nodes[BTRFS_MAX_LEVEL];
3331 };
3332
3333 struct disk_extent {
3334         u64 ram_bytes;
3335         u64 disk_bytenr;
3336         u64 disk_num_bytes;
3337         u64 offset;
3338         u64 num_bytes;
3339         u8 compression;
3340         u8 encryption;
3341         u16 other_encoding;
3342 };
3343
3344 static int is_cowonly_root(u64 root_objectid)
3345 {
3346         if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
3347             root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
3348             root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
3349             root_objectid == BTRFS_DEV_TREE_OBJECTID ||
3350             root_objectid == BTRFS_TREE_LOG_OBJECTID)
3351                 return 1;
3352         return 0;
3353 }
3354
3355 static int noinline __next_ref_path(struct btrfs_trans_handle *trans,
3356                                     struct btrfs_root *extent_root,
3357                                     struct btrfs_ref_path *ref_path,
3358                                     int first_time)
3359 {
3360         struct extent_buffer *leaf;
3361         struct btrfs_path *path;
3362         struct btrfs_extent_ref *ref;
3363         struct btrfs_key key;
3364         struct btrfs_key found_key;
3365         u64 bytenr;
3366         u32 nritems;
3367         int level;
3368         int ret = 1;
3369
3370         path = btrfs_alloc_path();
3371         if (!path)
3372                 return -ENOMEM;
3373
3374         if (first_time) {
3375                 ref_path->lowest_level = -1;
3376                 ref_path->current_level = -1;
3377                 ref_path->shared_level = -1;
3378                 goto walk_up;
3379         }
3380 walk_down:
3381         level = ref_path->current_level - 1;
3382         while (level >= -1) {
3383                 u64 parent;
3384                 if (level < ref_path->lowest_level)
3385                         break;
3386
3387                 if (level >= 0) {
3388                         bytenr = ref_path->nodes[level];
3389                 } else {
3390                         bytenr = ref_path->extent_start;
3391                 }
3392                 BUG_ON(bytenr == 0);
3393
3394                 parent = ref_path->nodes[level + 1];
3395                 ref_path->nodes[level + 1] = 0;
3396                 ref_path->current_level = level;
3397                 BUG_ON(parent == 0);
3398
3399                 key.objectid = bytenr;
3400                 key.offset = parent + 1;
3401                 key.type = BTRFS_EXTENT_REF_KEY;
3402
3403                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
3404                 if (ret < 0)
3405                         goto out;
3406                 BUG_ON(ret == 0);
3407
3408                 leaf = path->nodes[0];
3409                 nritems = btrfs_header_nritems(leaf);
3410                 if (path->slots[0] >= nritems) {
3411                         ret = btrfs_next_leaf(extent_root, path);
3412                         if (ret < 0)
3413                                 goto out;
3414                         if (ret > 0)
3415                                 goto next;
3416                         leaf = path->nodes[0];
3417                 }
3418
3419                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3420                 if (found_key.objectid == bytenr &&
3421                     found_key.type == BTRFS_EXTENT_REF_KEY) {
3422                         if (level < ref_path->shared_level)
3423                                 ref_path->shared_level = level;
3424                         goto found;
3425                 }
3426 next:
3427                 level--;
3428                 btrfs_release_path(extent_root, path);
3429                 if (need_resched()) {
3430                         cond_resched();
3431                 }
3432         }
3433         /* reached lowest level */
3434         ret = 1;
3435         goto out;
3436 walk_up:
3437         level = ref_path->current_level;
3438         while (level < BTRFS_MAX_LEVEL - 1) {
3439                 u64 ref_objectid;
3440                 if (level >= 0) {
3441                         bytenr = ref_path->nodes[level];
3442                 } else {
3443                         bytenr = ref_path->extent_start;
3444                 }
3445                 BUG_ON(bytenr == 0);
3446
3447                 key.objectid = bytenr;
3448                 key.offset = 0;
3449                 key.type = BTRFS_EXTENT_REF_KEY;
3450
3451                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
3452                 if (ret < 0)
3453                         goto out;
3454
3455                 leaf = path->nodes[0];
3456                 nritems = btrfs_header_nritems(leaf);
3457                 if (path->slots[0] >= nritems) {
3458                         ret = btrfs_next_leaf(extent_root, path);
3459                         if (ret < 0)
3460                                 goto out;
3461                         if (ret > 0) {
3462                                 /* the extent was freed by someone */
3463                                 if (ref_path->lowest_level == level)
3464                                         goto out;
3465                                 btrfs_release_path(extent_root, path);
3466                                 goto walk_down;
3467                         }
3468                         leaf = path->nodes[0];
3469                 }
3470
3471                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3472                 if (found_key.objectid != bytenr ||
3473                                 found_key.type != BTRFS_EXTENT_REF_KEY) {
3474                         /* the extent was freed by someone */
3475                         if (ref_path->lowest_level == level) {
3476                                 ret = 1;
3477                                 goto out;
3478                         }
3479                         btrfs_release_path(extent_root, path);
3480                         goto walk_down;
3481                 }
3482 found:
3483                 ref = btrfs_item_ptr(leaf, path->slots[0],
3484                                 struct btrfs_extent_ref);
3485                 ref_objectid = btrfs_ref_objectid(leaf, ref);
3486                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
3487                         if (first_time) {
3488                                 level = (int)ref_objectid;
3489                                 BUG_ON(level >= BTRFS_MAX_LEVEL);
3490                                 ref_path->lowest_level = level;
3491                                 ref_path->current_level = level;
3492                                 ref_path->nodes[level] = bytenr;
3493                         } else {
3494                                 WARN_ON(ref_objectid != level);
3495                         }
3496                 } else {
3497                         WARN_ON(level != -1);
3498                 }
3499                 first_time = 0;
3500
3501                 if (ref_path->lowest_level == level) {
3502                         ref_path->owner_objectid = ref_objectid;
3503                         ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
3504                 }
3505
3506                 /*
3507                  * the block is tree root or the block isn't in reference
3508                  * counted tree.
3509                  */
3510                 if (found_key.objectid == found_key.offset ||
3511                     is_cowonly_root(btrfs_ref_root(leaf, ref))) {
3512                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
3513                         ref_path->root_generation =
3514                                 btrfs_ref_generation(leaf, ref);
3515                         if (level < 0) {
3516                                 /* special reference from the tree log */
3517                                 ref_path->nodes[0] = found_key.offset;
3518                                 ref_path->current_level = 0;
3519                         }
3520                         ret = 0;
3521                         goto out;
3522                 }
3523
3524                 level++;
3525                 BUG_ON(ref_path->nodes[level] != 0);
3526                 ref_path->nodes[level] = found_key.offset;
3527                 ref_path->current_level = level;
3528
3529                 /*
3530                  * the reference was created in the running transaction,
3531                  * no need to continue walking up.
3532                  */
3533                 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
3534                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
3535                         ref_path->root_generation =
3536                                 btrfs_ref_generation(leaf, ref);
3537                         ret = 0;
3538                         goto out;
3539                 }
3540
3541                 btrfs_release_path(extent_root, path);
3542                 if (need_resched()) {
3543                         cond_resched();
3544                 }
3545         }
3546         /* reached max tree level, but no tree root found. */
3547         BUG();
3548 out:
3549         btrfs_free_path(path);
3550         return ret;
3551 }
3552
3553 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
3554                                 struct btrfs_root *extent_root,
3555                                 struct btrfs_ref_path *ref_path,
3556                                 u64 extent_start)
3557 {
3558         memset(ref_path, 0, sizeof(*ref_path));
3559         ref_path->extent_start = extent_start;
3560
3561         return __next_ref_path(trans, extent_root, ref_path, 1);
3562 }
3563
3564 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
3565                                struct btrfs_root *extent_root,
3566                                struct btrfs_ref_path *ref_path)
3567 {
3568         return __next_ref_path(trans, extent_root, ref_path, 0);
3569 }
3570
3571 static int noinline get_new_locations(struct inode *reloc_inode,
3572                                       struct btrfs_key *extent_key,
3573                                       u64 offset, int no_fragment,
3574                                       struct disk_extent **extents,
3575                                       int *nr_extents)
3576 {
3577         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
3578         struct btrfs_path *path;
3579         struct btrfs_file_extent_item *fi;
3580         struct extent_buffer *leaf;
3581         struct disk_extent *exts = *extents;
3582         struct btrfs_key found_key;
3583         u64 cur_pos;
3584         u64 last_byte;
3585         u32 nritems;
3586         int nr = 0;
3587         int max = *nr_extents;
3588         int ret;
3589
3590         WARN_ON(!no_fragment && *extents);
3591         if (!exts) {
3592                 max = 1;
3593                 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
3594                 if (!exts)
3595                         return -ENOMEM;
3596         }
3597
3598         path = btrfs_alloc_path();
3599         BUG_ON(!path);
3600
3601         cur_pos = extent_key->objectid - offset;
3602         last_byte = extent_key->objectid + extent_key->offset;
3603         ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
3604                                        cur_pos, 0);
3605         if (ret < 0)
3606                 goto out;
3607         if (ret > 0) {
3608                 ret = -ENOENT;
3609                 goto out;
3610         }
3611
3612         while (1) {
3613                 leaf = path->nodes[0];
3614                 nritems = btrfs_header_nritems(leaf);
3615                 if (path->slots[0] >= nritems) {
3616                         ret = btrfs_next_leaf(root, path);
3617                         if (ret < 0)
3618                                 goto out;
3619                         if (ret > 0)
3620                                 break;
3621                         leaf = path->nodes[0];
3622                 }
3623
3624                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3625                 if (found_key.offset != cur_pos ||
3626                     found_key.type != BTRFS_EXTENT_DATA_KEY ||
3627                     found_key.objectid != reloc_inode->i_ino)
3628                         break;
3629
3630                 fi = btrfs_item_ptr(leaf, path->slots[0],
3631                                     struct btrfs_file_extent_item);
3632                 if (btrfs_file_extent_type(leaf, fi) !=
3633                     BTRFS_FILE_EXTENT_REG ||
3634                     btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
3635                         break;
3636
3637                 if (nr == max) {
3638                         struct disk_extent *old = exts;
3639                         max *= 2;
3640                         exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
3641                         memcpy(exts, old, sizeof(*exts) * nr);
3642                         if (old != *extents)
3643                                 kfree(old);
3644                 }
3645
3646                 exts[nr].disk_bytenr =
3647                         btrfs_file_extent_disk_bytenr(leaf, fi);
3648                 exts[nr].disk_num_bytes =
3649                         btrfs_file_extent_disk_num_bytes(leaf, fi);
3650                 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
3651                 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
3652                 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
3653                 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
3654                 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
3655                 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
3656                                                                            fi);
3657                 WARN_ON(exts[nr].offset > 0);
3658                 WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
3659
3660                 cur_pos += exts[nr].num_bytes;
3661                 nr++;
3662
3663                 if (cur_pos + offset >= last_byte)
3664                         break;
3665
3666                 if (no_fragment) {
3667                         ret = 1;
3668                         goto out;
3669                 }
3670                 path->slots[0]++;
3671         }
3672
3673         WARN_ON(cur_pos + offset > last_byte);
3674         if (cur_pos + offset < last_byte) {
3675                 ret = -ENOENT;
3676                 goto out;
3677         }
3678         ret = 0;
3679 out:
3680         btrfs_free_path(path);
3681         if (ret) {
3682                 if (exts != *extents)
3683                         kfree(exts);
3684         } else {
3685                 *extents = exts;
3686                 *nr_extents = nr;
3687         }
3688         return ret;
3689 }
3690
3691 static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
3692                                         struct btrfs_root *root,
3693                                         struct btrfs_path *path,
3694                                         struct btrfs_key *extent_key,
3695                                         struct btrfs_key *leaf_key,
3696                                         struct btrfs_ref_path *ref_path,
3697                                         struct disk_extent *new_extents,
3698                                         int nr_extents)
3699 {
3700         struct extent_buffer *leaf;
3701         struct btrfs_file_extent_item *fi;
3702         struct inode *inode = NULL;
3703         struct btrfs_key key;
3704         u64 lock_start = 0;
3705         u64 lock_end = 0;
3706         u64 num_bytes;
3707         u64 ext_offset;
3708         u64 first_pos;
3709         u32 nritems;
3710         int nr_scaned = 0;
3711         int extent_locked = 0;
3712         int ret;
3713
3714         memcpy(&key, leaf_key, sizeof(key));
3715         first_pos = INT_LIMIT(loff_t) - extent_key->offset;
3716         if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
3717                 if (key.objectid < ref_path->owner_objectid ||
3718                     (key.objectid == ref_path->owner_objectid &&
3719                      key.type < BTRFS_EXTENT_DATA_KEY)) {
3720                         key.objectid = ref_path->owner_objectid;
3721                         key.type = BTRFS_EXTENT_DATA_KEY;
3722                         key.offset = 0;
3723                 }
3724         }
3725
3726         while (1) {
3727                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3728                 if (ret < 0)
3729                         goto out;
3730
3731                 leaf = path->nodes[0];
3732                 nritems = btrfs_header_nritems(leaf);
3733 next:
3734                 if (extent_locked && ret > 0) {
3735                         /*
3736                          * the file extent item was modified by someone
3737                          * before the extent got locked.
3738                          */
3739                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3740                                       lock_end, GFP_NOFS);
3741                         extent_locked = 0;
3742                 }
3743
3744                 if (path->slots[0] >= nritems) {
3745                         if (++nr_scaned > 2)
3746                                 break;
3747
3748                         BUG_ON(extent_locked);
3749                         ret = btrfs_next_leaf(root, path);
3750                         if (ret < 0)
3751                                 goto out;
3752                         if (ret > 0)
3753                                 break;
3754                         leaf = path->nodes[0];
3755                         nritems = btrfs_header_nritems(leaf);
3756                 }
3757
3758                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3759
3760                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
3761                         if ((key.objectid > ref_path->owner_objectid) ||
3762                             (key.objectid == ref_path->owner_objectid &&
3763                              key.type > BTRFS_EXTENT_DATA_KEY) ||
3764                             (key.offset >= first_pos + extent_key->offset))
3765                                 break;
3766                 }
3767
3768                 if (inode && key.objectid != inode->i_ino) {
3769                         BUG_ON(extent_locked);
3770                         btrfs_release_path(root, path);
3771                         mutex_unlock(&inode->i_mutex);
3772                         iput(inode);
3773                         inode = NULL;
3774                         continue;
3775                 }
3776
3777                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
3778                         path->slots[0]++;
3779                         ret = 1;
3780                         goto next;
3781                 }
3782                 fi = btrfs_item_ptr(leaf, path->slots[0],
3783                                     struct btrfs_file_extent_item);
3784                 if ((btrfs_file_extent_type(leaf, fi) !=
3785                      BTRFS_FILE_EXTENT_REG) ||
3786                     (btrfs_file_extent_disk_bytenr(leaf, fi) !=
3787                      extent_key->objectid)) {
3788                         path->slots[0]++;
3789                         ret = 1;
3790                         goto next;
3791                 }
3792
3793                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
3794                 ext_offset = btrfs_file_extent_offset(leaf, fi);
3795
3796                 if (first_pos > key.offset - ext_offset)
3797                         first_pos = key.offset - ext_offset;
3798
3799                 if (!extent_locked) {
3800                         lock_start = key.offset;
3801                         lock_end = lock_start + num_bytes - 1;
3802                 } else {
3803                         if (lock_start > key.offset ||
3804                             lock_end + 1 < key.offset + num_bytes) {
3805                                 unlock_extent(&BTRFS_I(inode)->io_tree,
3806                                               lock_start, lock_end, GFP_NOFS);
3807                                 extent_locked = 0;
3808                         }
3809                 }
3810
3811                 if (!inode) {
3812                         btrfs_release_path(root, path);
3813
3814                         inode = btrfs_iget_locked(root->fs_info->sb,
3815                                                   key.objectid, root);
3816                         if (inode->i_state & I_NEW) {
3817                                 BTRFS_I(inode)->root = root;
3818                                 BTRFS_I(inode)->location.objectid =
3819                                         key.objectid;
3820                                 BTRFS_I(inode)->location.type =
3821                                         BTRFS_INODE_ITEM_KEY;
3822                                 BTRFS_I(inode)->location.offset = 0;
3823                                 btrfs_read_locked_inode(inode);
3824                                 unlock_new_inode(inode);
3825                         }
3826                         /*
3827                          * some code call btrfs_commit_transaction while
3828                          * holding the i_mutex, so we can't use mutex_lock
3829                          * here.
3830                          */
3831                         if (is_bad_inode(inode) ||
3832                             !mutex_trylock(&inode->i_mutex)) {
3833                                 iput(inode);
3834                                 inode = NULL;
3835                                 key.offset = (u64)-1;
3836                                 goto skip;
3837                         }
3838                 }
3839
3840                 if (!extent_locked) {
3841                         struct btrfs_ordered_extent *ordered;
3842
3843                         btrfs_release_path(root, path);
3844
3845                         lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3846                                     lock_end, GFP_NOFS);
3847                         ordered = btrfs_lookup_first_ordered_extent(inode,
3848                                                                     lock_end);
3849                         if (ordered &&
3850                             ordered->file_offset <= lock_end &&
3851                             ordered->file_offset + ordered->len > lock_start) {
3852                                 unlock_extent(&BTRFS_I(inode)->io_tree,
3853                                               lock_start, lock_end, GFP_NOFS);
3854                                 btrfs_start_ordered_extent(inode, ordered, 1);
3855                                 btrfs_put_ordered_extent(ordered);
3856                                 key.offset += num_bytes;
3857                                 goto skip;
3858                         }
3859                         if (ordered)
3860                                 btrfs_put_ordered_extent(ordered);
3861
3862                         extent_locked = 1;
3863                         continue;
3864                 }
3865
3866                 if (nr_extents == 1) {
3867                         /* update extent pointer in place */
3868                         btrfs_set_file_extent_generation(leaf, fi,
3869                                                 trans->transid);
3870                         btrfs_set_file_extent_disk_bytenr(leaf, fi,
3871                                                 new_extents[0].disk_bytenr);
3872                         btrfs_set_file_extent_disk_num_bytes(leaf, fi,
3873                                                 new_extents[0].disk_num_bytes);
3874                         btrfs_set_file_extent_ram_bytes(leaf, fi,
3875                                                 new_extents[0].ram_bytes);
3876                         ext_offset += new_extents[0].offset;
3877                         btrfs_set_file_extent_offset(leaf, fi, ext_offset);
3878                         btrfs_mark_buffer_dirty(leaf);
3879
3880                         btrfs_drop_extent_cache(inode, key.offset,
3881                                                 key.offset + num_bytes - 1, 0);
3882
3883                         ret = btrfs_inc_extent_ref(trans, root,
3884                                                 new_extents[0].disk_bytenr,
3885                                                 new_extents[0].disk_num_bytes,
3886                                                 leaf->start,
3887                                                 root->root_key.objectid,
3888                                                 trans->transid,
3889                                                 key.objectid);
3890                         BUG_ON(ret);
3891
3892                         ret = btrfs_free_extent(trans, root,
3893                                                 extent_key->objectid,
3894                                                 extent_key->offset,
3895                                                 leaf->start,
3896                                                 btrfs_header_owner(leaf),
3897                                                 btrfs_header_generation(leaf),
3898                                                 key.objectid, 0);
3899                         BUG_ON(ret);
3900
3901                         btrfs_release_path(root, path);
3902                         key.offset += num_bytes;
3903                 } else {
3904                         u64 alloc_hint;
3905                         u64 extent_len;
3906                         int i;
3907                         /*
3908                          * drop old extent pointer at first, then insert the
3909                          * new pointers one bye one
3910                          */
3911                         btrfs_release_path(root, path);
3912                         ret = btrfs_drop_extents(trans, root, inode, key.offset,
3913                                                  key.offset + num_bytes,
3914                                                  key.offset, &alloc_hint);
3915                         BUG_ON(ret);
3916
3917                         for (i = 0; i < nr_extents; i++) {
3918                                 if (ext_offset >= new_extents[i].num_bytes) {
3919                                         ext_offset -= new_extents[i].num_bytes;
3920                                         continue;
3921                                 }
3922                                 extent_len = min(new_extents[i].num_bytes -
3923                                                  ext_offset, num_bytes);
3924
3925                                 ret = btrfs_insert_empty_item(trans, root,
3926                                                               path, &key,
3927                                                               sizeof(*fi));
3928                                 BUG_ON(ret);
3929
3930                                 leaf = path->nodes[0];
3931                                 fi = btrfs_item_ptr(leaf, path->slots[0],
3932                                                 struct btrfs_file_extent_item);
3933                                 btrfs_set_file_extent_generation(leaf, fi,
3934                                                         trans->transid);
3935                                 btrfs_set_file_extent_type(leaf, fi,
3936                                                         BTRFS_FILE_EXTENT_REG);
3937                                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
3938                                                 new_extents[i].disk_bytenr);
3939                                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
3940                                                 new_extents[i].disk_num_bytes);
3941                                 btrfs_set_file_extent_ram_bytes(leaf, fi,
3942                                                 new_extents[i].ram_bytes);
3943
3944                                 btrfs_set_file_extent_compression(leaf, fi,
3945                                                 new_extents[i].compression);
3946                                 btrfs_set_file_extent_encryption(leaf, fi,
3947                                                 new_extents[i].encryption);
3948                                 btrfs_set_file_extent_other_encoding(leaf, fi,
3949                                                 new_extents[i].other_encoding);
3950
3951                                 btrfs_set_file_extent_num_bytes(leaf, fi,
3952                                                         extent_len);
3953                                 ext_offset += new_extents[i].offset;
3954                                 btrfs_set_file_extent_offset(leaf, fi,
3955                                                         ext_offset);
3956                                 btrfs_mark_buffer_dirty(leaf);
3957
3958                                 btrfs_drop_extent_cache(inode, key.offset,
3959                                                 key.offset + extent_len - 1, 0);
3960
3961                                 ret = btrfs_inc_extent_ref(trans, root,
3962                                                 new_extents[i].disk_bytenr,
3963                                                 new_extents[i].disk_num_bytes,
3964                                                 leaf->start,
3965                                                 root->root_key.objectid,
3966                                                 trans->transid, key.objectid);
3967                                 BUG_ON(ret);
3968                                 btrfs_release_path(root, path);
3969
3970                                 inode_add_bytes(inode, extent_len);
3971
3972                                 ext_offset = 0;
3973                                 num_bytes -= extent_len;
3974                                 key.offset += extent_len;
3975
3976                                 if (num_bytes == 0)
3977                                         break;
3978                         }
3979                         BUG_ON(i >= nr_extents);
3980                 }
3981
3982                 if (extent_locked) {
3983                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3984                                       lock_end, GFP_NOFS);
3985                         extent_locked = 0;
3986                 }
3987 skip:
3988                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
3989                     key.offset >= first_pos + extent_key->offset)
3990                         break;
3991
3992                 cond_resched();
3993         }
3994         ret = 0;
3995 out:
3996         btrfs_release_path(root, path);
3997         if (inode) {
3998                 mutex_unlock(&inode->i_mutex);
3999                 if (extent_locked) {
4000                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4001                                       lock_end, GFP_NOFS);
4002                 }
4003                 iput(inode);
4004         }
4005         return ret;
4006 }
4007
4008 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
4009                                struct btrfs_root *root,
4010                                struct extent_buffer *buf, u64 orig_start)
4011 {
4012         int level;
4013         int ret;
4014
4015         BUG_ON(btrfs_header_generation(buf) != trans->transid);
4016         BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
4017
4018         level = btrfs_header_level(buf);
4019         if (level == 0) {
4020                 struct btrfs_leaf_ref *ref;
4021                 struct btrfs_leaf_ref *orig_ref;
4022
4023                 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
4024                 if (!orig_ref)
4025                         return -ENOENT;
4026
4027                 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
4028                 if (!ref) {
4029                         btrfs_free_leaf_ref(root, orig_ref);
4030                         return -ENOMEM;
4031                 }
4032
4033                 ref->nritems = orig_ref->nritems;
4034                 memcpy(ref->extents, orig_ref->extents,
4035                         sizeof(ref->extents[0]) * ref->nritems);
4036
4037                 btrfs_free_leaf_ref(root, orig_ref);
4038
4039                 ref->root_gen = trans->transid;
4040                 ref->bytenr = buf->start;
4041                 ref->owner = btrfs_header_owner(buf);
4042                 ref->generation = btrfs_header_generation(buf);
4043                 ret = btrfs_add_leaf_ref(root, ref, 0);
4044                 WARN_ON(ret);
4045                 btrfs_free_leaf_ref(root, ref);
4046         }
4047         return 0;
4048 }
4049
4050 static int noinline invalidate_extent_cache(struct btrfs_root *root,
4051                                         struct extent_buffer *leaf,
4052                                         struct btrfs_block_group_cache *group,
4053                                         struct btrfs_root *target_root)
4054 {
4055         struct btrfs_key key;
4056         struct inode *inode = NULL;
4057         struct btrfs_file_extent_item *fi;
4058         u64 num_bytes;
4059         u64 skip_objectid = 0;
4060         u32 nritems;
4061         u32 i;
4062
4063         nritems = btrfs_header_nritems(leaf);
4064         for (i = 0; i < nritems; i++) {
4065                 btrfs_item_key_to_cpu(leaf, &key, i);
4066                 if (key.objectid == skip_objectid ||
4067                     key.type != BTRFS_EXTENT_DATA_KEY)
4068                         continue;
4069                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4070                 if (btrfs_file_extent_type(leaf, fi) ==
4071                     BTRFS_FILE_EXTENT_INLINE)
4072                         continue;
4073                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
4074                         continue;
4075                 if (!inode || inode->i_ino != key.objectid) {
4076                         iput(inode);
4077                         inode = btrfs_ilookup(target_root->fs_info->sb,
4078                                               key.objectid, target_root, 1);
4079                 }
4080                 if (!inode) {
4081                         skip_objectid = key.objectid;
4082                         continue;
4083                 }
4084                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4085
4086                 lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4087                             key.offset + num_bytes - 1, GFP_NOFS);
4088                 btrfs_drop_extent_cache(inode, key.offset,
4089                                         key.offset + num_bytes - 1, 1);
4090                 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4091                               key.offset + num_bytes - 1, GFP_NOFS);
4092                 cond_resched();
4093         }
4094         iput(inode);
4095         return 0;
4096 }
4097
4098 static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans,
4099                                         struct btrfs_root *root,
4100                                         struct extent_buffer *leaf,
4101                                         struct btrfs_block_group_cache *group,
4102                                         struct inode *reloc_inode)
4103 {
4104         struct btrfs_key key;
4105         struct btrfs_key extent_key;
4106         struct btrfs_file_extent_item *fi;
4107         struct btrfs_leaf_ref *ref;
4108         struct disk_extent *new_extent;
4109         u64 bytenr;
4110         u64 num_bytes;
4111         u32 nritems;
4112         u32 i;
4113         int ext_index;
4114         int nr_extent;
4115         int ret;
4116
4117         new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
4118         BUG_ON(!new_extent);
4119
4120         ref = btrfs_lookup_leaf_ref(root, leaf->start);
4121         BUG_ON(!ref);
4122
4123         ext_index = -1;
4124         nritems = btrfs_header_nritems(leaf);
4125         for (i = 0; i < nritems; i++) {
4126                 btrfs_item_key_to_cpu(leaf, &key, i);
4127                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
4128                         continue;
4129                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4130                 if (btrfs_file_extent_type(leaf, fi) ==
4131                     BTRFS_FILE_EXTENT_INLINE)
4132                         continue;
4133                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4134                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4135                 if (bytenr == 0)
4136                         continue;
4137
4138                 ext_index++;
4139                 if (bytenr >= group->key.objectid + group->key.offset ||
4140                     bytenr + num_bytes <= group->key.objectid)
4141                         continue;
4142
4143                 extent_key.objectid = bytenr;
4144                 extent_key.offset = num_bytes;
4145                 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4146                 nr_extent = 1;
4147                 ret = get_new_locations(reloc_inode, &extent_key,
4148                                         group->key.objectid, 1,
4149                                         &new_extent, &nr_extent);
4150                 if (ret > 0)
4151                         continue;
4152                 BUG_ON(ret < 0);
4153
4154                 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
4155                 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
4156                 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
4157                 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
4158
4159                 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
4160                 btrfs_set_file_extent_ram_bytes(leaf, fi,
4161                                                 new_extent->ram_bytes);
4162                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4163                                                 new_extent->disk_bytenr);
4164                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4165                                                 new_extent->disk_num_bytes);
4166                 new_extent->offset += btrfs_file_extent_offset(leaf, fi);
4167                 btrfs_set_file_extent_offset(leaf, fi, new_extent->offset);
4168                 btrfs_mark_buffer_dirty(leaf);
4169
4170                 ret = btrfs_inc_extent_ref(trans, root,
4171                                         new_extent->disk_bytenr,
4172                                         new_extent->disk_num_bytes,
4173                                         leaf->start,
4174                                         root->root_key.objectid,
4175                                         trans->transid, key.objectid);
4176                 BUG_ON(ret);
4177                 ret = btrfs_free_extent(trans, root,
4178                                         bytenr, num_bytes, leaf->start,
4179                                         btrfs_header_owner(leaf),
4180                                         btrfs_header_generation(leaf),
4181                                         key.objectid, 0);
4182                 BUG_ON(ret);
4183                 cond_resched();
4184         }
4185         kfree(new_extent);
4186         BUG_ON(ext_index + 1 != ref->nritems);
4187         btrfs_free_leaf_ref(root, ref);
4188         return 0;
4189 }
4190
4191 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
4192                           struct btrfs_root *root)
4193 {
4194         struct btrfs_root *reloc_root;
4195         int ret;
4196
4197         if (root->reloc_root) {
4198                 reloc_root = root->reloc_root;
4199                 root->reloc_root = NULL;
4200                 list_add(&reloc_root->dead_list,
4201                          &root->fs_info->dead_reloc_roots);
4202
4203                 btrfs_set_root_bytenr(&reloc_root->root_item,
4204                                       reloc_root->node->start);
4205                 btrfs_set_root_level(&root->root_item,
4206                                      btrfs_header_level(reloc_root->node));
4207                 memset(&reloc_root->root_item.drop_progress, 0,
4208                         sizeof(struct btrfs_disk_key));
4209                 reloc_root->root_item.drop_level = 0;
4210
4211                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
4212                                         &reloc_root->root_key,
4213                                         &reloc_root->root_item);
4214                 BUG_ON(ret);
4215         }
4216         return 0;
4217 }
4218
4219 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
4220 {
4221         struct btrfs_trans_handle *trans;
4222         struct btrfs_root *reloc_root;
4223         struct btrfs_root *prev_root = NULL;
4224         struct list_head dead_roots;
4225         int ret;
4226         unsigned long nr;
4227
4228         INIT_LIST_HEAD(&dead_roots);
4229         list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
4230
4231         while (!list_empty(&dead_roots)) {
4232                 reloc_root = list_entry(dead_roots.prev,
4233                                         struct btrfs_root, dead_list);
4234                 list_del_init(&reloc_root->dead_list);
4235
4236                 BUG_ON(reloc_root->commit_root != NULL);
4237                 while (1) {
4238                         trans = btrfs_join_transaction(root, 1);
4239                         BUG_ON(!trans);
4240
4241                         mutex_lock(&root->fs_info->drop_mutex);
4242                         ret = btrfs_drop_snapshot(trans, reloc_root);
4243                         if (ret != -EAGAIN)
4244                                 break;
4245                         mutex_unlock(&root->fs_info->drop_mutex);
4246
4247                         nr = trans->blocks_used;
4248                         ret = btrfs_end_transaction(trans, root);
4249                         BUG_ON(ret);
4250                         btrfs_btree_balance_dirty(root, nr);
4251                 }
4252
4253                 free_extent_buffer(reloc_root->node);
4254
4255                 ret = btrfs_del_root(trans, root->fs_info->tree_root,
4256                                      &reloc_root->root_key);
4257                 BUG_ON(ret);
4258                 mutex_unlock(&root->fs_info->drop_mutex);
4259
4260                 nr = trans->blocks_used;
4261                 ret = btrfs_end_transaction(trans, root);
4262                 BUG_ON(ret);
4263                 btrfs_btree_balance_dirty(root, nr);
4264
4265                 kfree(prev_root);
4266                 prev_root = reloc_root;
4267         }
4268         if (prev_root) {
4269                 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
4270                 kfree(prev_root);
4271         }
4272         return 0;
4273 }
4274
4275 int btrfs_add_dead_reloc_root(struct btrfs_root *root)
4276 {
4277         list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
4278         return 0;
4279 }
4280
4281 int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
4282 {
4283         struct btrfs_root *reloc_root;
4284         struct btrfs_trans_handle *trans;
4285         struct btrfs_key location;
4286         int found;
4287         int ret;
4288
4289         mutex_lock(&root->fs_info->tree_reloc_mutex);
4290         ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
4291         BUG_ON(ret);
4292         found = !list_empty(&root->fs_info->dead_reloc_roots);
4293         mutex_unlock(&root->fs_info->tree_reloc_mutex);
4294
4295         if (found) {
4296                 trans = btrfs_start_transaction(root, 1);
4297                 BUG_ON(!trans);
4298                 ret = btrfs_commit_transaction(trans, root);
4299                 BUG_ON(ret);
4300         }
4301
4302         location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
4303         location.offset = (u64)-1;
4304         location.type = BTRFS_ROOT_ITEM_KEY;
4305
4306         reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
4307         BUG_ON(!reloc_root);
4308         btrfs_orphan_cleanup(reloc_root);
4309         return 0;
4310 }
4311
4312 static int noinline init_reloc_tree(struct btrfs_trans_handle *trans,
4313                                     struct btrfs_root *root)
4314 {
4315         struct btrfs_root *reloc_root;
4316         struct extent_buffer *eb;
4317         struct btrfs_root_item *root_item;
4318         struct btrfs_key root_key;
4319         int ret;
4320
4321         BUG_ON(!root->ref_cows);
4322         if (root->reloc_root)
4323                 return 0;
4324
4325         root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
4326         BUG_ON(!root_item);
4327
4328         ret = btrfs_copy_root(trans, root, root->commit_root,
4329                               &eb, BTRFS_TREE_RELOC_OBJECTID);
4330         BUG_ON(ret);
4331
4332         root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4333         root_key.offset = root->root_key.objectid;
4334         root_key.type = BTRFS_ROOT_ITEM_KEY;
4335
4336         memcpy(root_item, &root->root_item, sizeof(root_item));
4337         btrfs_set_root_refs(root_item, 0);
4338         btrfs_set_root_bytenr(root_item, eb->start);
4339         btrfs_set_root_level(root_item, btrfs_header_level(eb));
4340         btrfs_set_root_generation(root_item, trans->transid);
4341
4342         btrfs_tree_unlock(eb);
4343         free_extent_buffer(eb);
4344
4345         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
4346                                 &root_key, root_item);
4347         BUG_ON(ret);
4348         kfree(root_item);
4349
4350         reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
4351                                                  &root_key);
4352         BUG_ON(!reloc_root);
4353         reloc_root->last_trans = trans->transid;
4354         reloc_root->commit_root = NULL;
4355         reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
4356
4357         root->reloc_root = reloc_root;
4358         return 0;
4359 }
4360
4361 /*
4362  * Core function of space balance.
4363  *
4364  * The idea is using reloc trees to relocate tree blocks in reference
4365  * counted roots. There is one reloc tree for each subvol, and all
4366  * reloc trees share same root key objectid. Reloc trees are snapshots
4367  * of the latest committed roots of subvols (root->commit_root).
4368  *
4369  * To relocate a tree block referenced by a subvol, there are two steps.
4370  * COW the block through subvol's reloc tree, then update block pointer
4371  * in the subvol to point to the new block. Since all reloc trees share
4372  * same root key objectid, doing special handing for tree blocks owned
4373  * by them is easy. Once a tree block has been COWed in one reloc tree,
4374  * we can use the resulting new block directly when the same block is
4375  * required to COW again through other reloc trees. By this way, relocated
4376  * tree blocks are shared between reloc trees, so they are also shared
4377  * between subvols.
4378  */
4379 static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
4380                                       struct btrfs_root *root,
4381                                       struct btrfs_path *path,
4382                                       struct btrfs_key *first_key,
4383                                       struct btrfs_ref_path *ref_path,
4384                                       struct btrfs_block_group_cache *group,
4385                                       struct inode *reloc_inode)
4386 {
4387         struct btrfs_root *reloc_root;
4388         struct extent_buffer *eb = NULL;
4389         struct btrfs_key *keys;
4390         u64 *nodes;
4391         int level;
4392         int shared_level;
4393         int lowest_level = 0;
4394         int ret;
4395
4396         if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
4397                 lowest_level = ref_path->owner_objectid;
4398
4399         if (!root->ref_cows) {
4400                 path->lowest_level = lowest_level;
4401                 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
4402                 BUG_ON(ret < 0);
4403                 path->lowest_level = 0;
4404                 btrfs_release_path(root, path);
4405                 return 0;
4406         }
4407
4408         mutex_lock(&root->fs_info->tree_reloc_mutex);
4409         ret = init_reloc_tree(trans, root);
4410         BUG_ON(ret);
4411         reloc_root = root->reloc_root;
4412
4413         shared_level = ref_path->shared_level;
4414         ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
4415
4416         keys = ref_path->node_keys;
4417         nodes = ref_path->new_nodes;
4418         memset(&keys[shared_level + 1], 0,
4419                sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
4420         memset(&nodes[shared_level + 1], 0,
4421                sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
4422
4423         if (nodes[lowest_level] == 0) {
4424                 path->lowest_level = lowest_level;
4425                 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
4426                                         0, 1);
4427                 BUG_ON(ret);
4428                 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
4429                         eb = path->nodes[level];
4430                         if (!eb || eb == reloc_root->node)
4431                                 break;
4432                         nodes[level] = eb->start;
4433                         if (level == 0)
4434                                 btrfs_item_key_to_cpu(eb, &keys[level], 0);
4435                         else
4436                                 btrfs_node_key_to_cpu(eb, &keys[level], 0);
4437                 }
4438                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4439                         eb = path->nodes[0];
4440                         ret = replace_extents_in_leaf(trans, reloc_root, eb,
4441                                                       group, reloc_inode);
4442                         BUG_ON(ret);
4443                 }
4444                 btrfs_release_path(reloc_root, path);
4445         } else {
4446                 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
4447                                        lowest_level);
4448                 BUG_ON(ret);
4449         }
4450
4451         /*
4452          * replace tree blocks in the fs tree with tree blocks in
4453          * the reloc tree.
4454          */
4455         ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
4456         BUG_ON(ret < 0);
4457
4458         if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4459                 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
4460                                         0, 0);
4461                 BUG_ON(ret);
4462                 extent_buffer_get(path->nodes[0]);
4463                 eb = path->nodes[0];
4464                 btrfs_release_path(reloc_root, path);
4465                 ret = invalidate_extent_cache(reloc_root, eb, group, root);
4466                 BUG_ON(ret);
4467                 free_extent_buffer(eb);
4468         }
4469
4470         mutex_unlock(&root->fs_info->tree_reloc_mutex);
4471         path->lowest_level = 0;
4472         return 0;
4473 }
4474
4475 static int noinline relocate_tree_block(struct btrfs_trans_handle *trans,
4476                                         struct btrfs_root *root,
4477                                         struct btrfs_path *path,
4478                                         struct btrfs_key *first_key,
4479                                         struct btrfs_ref_path *ref_path)
4480 {
4481         int ret;
4482
4483         ret = relocate_one_path(trans, root, path, first_key,
4484                                 ref_path, NULL, NULL);
4485         BUG_ON(ret);
4486
4487         if (root == root->fs_info->extent_root)
4488                 btrfs_extent_post_op(trans, root);
4489
4490         return 0;
4491 }
4492
4493 static int noinline del_extent_zero(struct btrfs_trans_handle *trans,
4494                                     struct btrfs_root *extent_root,
4495                                     struct btrfs_path *path,
4496                                     struct btrfs_key *extent_key)
4497 {
4498         int ret;
4499
4500         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
4501         if (ret)
4502                 goto out;
4503         ret = btrfs_del_item(trans, extent_root, path);
4504 out:
4505         btrfs_release_path(extent_root, path);
4506         return ret;
4507 }
4508
4509 static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info,
4510                                                 struct btrfs_ref_path *ref_path)
4511 {
4512         struct btrfs_key root_key;
4513
4514         root_key.objectid = ref_path->root_objectid;
4515         root_key.type = BTRFS_ROOT_ITEM_KEY;
4516         if (is_cowonly_root(ref_path->root_objectid))
4517                 root_key.offset = 0;
4518         else
4519                 root_key.offset = (u64)-1;
4520
4521         return btrfs_read_fs_root_no_name(fs_info, &root_key);
4522 }
4523
4524 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
4525                                         struct btrfs_path *path,
4526                                         struct btrfs_key *extent_key,
4527                                         struct btrfs_block_group_cache *group,
4528                                         struct inode *reloc_inode, int pass)
4529 {
4530         struct btrfs_trans_handle *trans;
4531         struct btrfs_root *found_root;
4532         struct btrfs_ref_path *ref_path = NULL;
4533         struct disk_extent *new_extents = NULL;
4534         int nr_extents = 0;
4535         int loops;
4536         int ret;
4537         int level;
4538         struct btrfs_key first_key;
4539         u64 prev_block = 0;
4540
4541
4542         trans = btrfs_start_transaction(extent_root, 1);
4543         BUG_ON(!trans);
4544
4545         if (extent_key->objectid == 0) {
4546                 ret = del_extent_zero(trans, extent_root, path, extent_key);
4547                 goto out;
4548         }
4549
4550         ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
4551         if (!ref_path) {
4552                ret = -ENOMEM;
4553                goto out;
4554         }
4555
4556         for (loops = 0; ; loops++) {
4557                 if (loops == 0) {
4558                         ret = btrfs_first_ref_path(trans, extent_root, ref_path,
4559                                                    extent_key->objectid);
4560                 } else {
4561                         ret = btrfs_next_ref_path(trans, extent_root, ref_path);
4562                 }
4563                 if (ret < 0)
4564                         goto out;
4565                 if (ret > 0)
4566                         break;
4567
4568                 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
4569                     ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
4570                         continue;
4571
4572                 found_root = read_ref_root(extent_root->fs_info, ref_path);
4573                 BUG_ON(!found_root);
4574                 /*
4575                  * for reference counted tree, only process reference paths
4576                  * rooted at the latest committed root.
4577                  */
4578                 if (found_root->ref_cows &&
4579                     ref_path->root_generation != found_root->root_key.offset)
4580                         continue;
4581
4582                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4583                         if (pass == 0) {
4584                                 /*
4585                                  * copy data extents to new locations
4586                                  */
4587                                 u64 group_start = group->key.objectid;
4588                                 ret = relocate_data_extent(reloc_inode,
4589                                                            extent_key,
4590                                                            group_start);
4591                                 if (ret < 0)
4592                                         goto out;
4593                                 break;
4594                         }
4595                         level = 0;
4596                 } else {
4597                         level = ref_path->owner_objectid;
4598                 }
4599
4600                 if (prev_block != ref_path->nodes[level]) {
4601                         struct extent_buffer *eb;
4602                         u64 block_start = ref_path->nodes[level];
4603                         u64 block_size = btrfs_level_size(found_root, level);
4604
4605                         eb = read_tree_block(found_root, block_start,
4606                                              block_size, 0);
4607                         btrfs_tree_lock(eb);
4608                         BUG_ON(level != btrfs_header_level(eb));
4609
4610                         if (level == 0)
4611                                 btrfs_item_key_to_cpu(eb, &first_key, 0);
4612                         else
4613                                 btrfs_node_key_to_cpu(eb, &first_key, 0);
4614
4615                         btrfs_tree_unlock(eb);
4616                         free_extent_buffer(eb);
4617                         prev_block = block_start;
4618                 }
4619
4620                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4621                     pass >= 2) {
4622                         /*
4623                          * use fallback method to process the remaining
4624                          * references.
4625                          */
4626                         if (!new_extents) {
4627                                 u64 group_start = group->key.objectid;
4628                                 ret = get_new_locations(reloc_inode,
4629                                                         extent_key,
4630                                                         group_start, 0,
4631                                                         &new_extents,
4632                                                         &nr_extents);
4633                                 if (ret < 0)
4634                                         goto out;
4635                         }
4636                         btrfs_record_root_in_trans(found_root);
4637                         ret = replace_one_extent(trans, found_root,
4638                                                 path, extent_key,
4639                                                 &first_key, ref_path,
4640                                                 new_extents, nr_extents);
4641                         if (ret < 0)
4642                                 goto out;
4643                         continue;
4644                 }
4645
4646                 btrfs_record_root_in_trans(found_root);
4647                 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
4648                         ret = relocate_tree_block(trans, found_root, path,
4649                                                   &first_key, ref_path);
4650                 } else {
4651                         /*
4652                          * try to update data extent references while
4653                          * keeping metadata shared between snapshots.
4654                          */
4655                         ret = relocate_one_path(trans, found_root, path,
4656                                                 &first_key, ref_path,
4657                                                 group, reloc_inode);
4658                 }
4659                 if (ret < 0)
4660                         goto out;
4661         }
4662         ret = 0;
4663 out:
4664         btrfs_end_transaction(trans, extent_root);
4665         kfree(new_extents);
4666         kfree(ref_path);
4667         return ret;
4668 }
4669
4670 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
4671 {
4672         u64 num_devices;
4673         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
4674                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
4675
4676         num_devices = root->fs_info->fs_devices->num_devices;
4677         if (num_devices == 1) {
4678                 stripped |= BTRFS_BLOCK_GROUP_DUP;
4679                 stripped = flags & ~stripped;
4680
4681                 /* turn raid0 into single device chunks */
4682                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
4683                         return stripped;
4684
4685                 /* turn mirroring into duplication */
4686                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
4687                              BTRFS_BLOCK_GROUP_RAID10))
4688                         return stripped | BTRFS_BLOCK_GROUP_DUP;
4689                 return flags;
4690         } else {
4691                 /* they already had raid on here, just return */
4692                 if (flags & stripped)
4693                         return flags;
4694
4695                 stripped |= BTRFS_BLOCK_GROUP_DUP;
4696                 stripped = flags & ~stripped;
4697
4698                 /* switch duplicated blocks with raid1 */
4699                 if (flags & BTRFS_BLOCK_GROUP_DUP)
4700                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
4701
4702                 /* turn single device chunks into raid0 */
4703                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
4704         }
4705         return flags;
4706 }
4707
4708 int __alloc_chunk_for_shrink(struct btrfs_root *root,
4709                      struct btrfs_block_group_cache *shrink_block_group,
4710                      int force)
4711 {
4712         struct btrfs_trans_handle *trans;
4713         u64 new_alloc_flags;
4714         u64 calc;
4715
4716         spin_lock(&shrink_block_group->lock);
4717         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
4718                 spin_unlock(&shrink_block_group->lock);
4719
4720                 trans = btrfs_start_transaction(root, 1);
4721                 spin_lock(&shrink_block_group->lock);
4722
4723                 new_alloc_flags = update_block_group_flags(root,
4724                                                    shrink_block_group->flags);
4725                 if (new_alloc_flags != shrink_block_group->flags) {
4726                         calc =
4727                              btrfs_block_group_used(&shrink_block_group->item);
4728                 } else {
4729                         calc = shrink_block_group->key.offset;
4730                 }
4731                 spin_unlock(&shrink_block_group->lock);
4732
4733                 do_chunk_alloc(trans, root->fs_info->extent_root,
4734                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
4735
4736                 btrfs_end_transaction(trans, root);
4737         } else
4738                 spin_unlock(&shrink_block_group->lock);
4739         return 0;
4740 }
4741
4742 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
4743                                  struct btrfs_root *root,
4744                                  u64 objectid, u64 size)
4745 {
4746         struct btrfs_path *path;
4747         struct btrfs_inode_item *item;
4748         struct extent_buffer *leaf;
4749         int ret;
4750
4751         path = btrfs_alloc_path();
4752         if (!path)
4753                 return -ENOMEM;
4754
4755         ret = btrfs_insert_empty_inode(trans, root, path, objectid);
4756         if (ret)
4757                 goto out;
4758
4759         leaf = path->nodes[0];
4760         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
4761         memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
4762         btrfs_set_inode_generation(leaf, item, 1);
4763         btrfs_set_inode_size(leaf, item, size);
4764         btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
4765         btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM);
4766         btrfs_mark_buffer_dirty(leaf);
4767         btrfs_release_path(root, path);
4768 out:
4769         btrfs_free_path(path);
4770         return ret;
4771 }
4772
4773 static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
4774                                         struct btrfs_block_group_cache *group)
4775 {
4776         struct inode *inode = NULL;
4777         struct btrfs_trans_handle *trans;
4778         struct btrfs_root *root;
4779         struct btrfs_key root_key;
4780         u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
4781         int err = 0;
4782
4783         root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
4784         root_key.type = BTRFS_ROOT_ITEM_KEY;
4785         root_key.offset = (u64)-1;
4786         root = btrfs_read_fs_root_no_name(fs_info, &root_key);
4787         if (IS_ERR(root))
4788                 return ERR_CAST(root);
4789
4790         trans = btrfs_start_transaction(root, 1);
4791         BUG_ON(!trans);
4792
4793         err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
4794         if (err)
4795                 goto out;
4796
4797         err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
4798         BUG_ON(err);
4799
4800         err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
4801                                        group->key.offset, 0, group->key.offset,
4802                                        0, 0, 0);
4803         BUG_ON(err);
4804
4805         inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
4806         if (inode->i_state & I_NEW) {
4807                 BTRFS_I(inode)->root = root;
4808                 BTRFS_I(inode)->location.objectid = objectid;
4809                 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
4810                 BTRFS_I(inode)->location.offset = 0;
4811                 btrfs_read_locked_inode(inode);
4812                 unlock_new_inode(inode);
4813                 BUG_ON(is_bad_inode(inode));
4814         } else {
4815                 BUG_ON(1);
4816         }
4817
4818         err = btrfs_orphan_add(trans, inode);
4819 out:
4820         btrfs_end_transaction(trans, root);
4821         if (err) {
4822                 if (inode)
4823                         iput(inode);
4824                 inode = ERR_PTR(err);
4825         }
4826         return inode;
4827 }
4828
4829 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
4830 {
4831         struct btrfs_trans_handle *trans;
4832         struct btrfs_path *path;
4833         struct btrfs_fs_info *info = root->fs_info;
4834         struct extent_buffer *leaf;
4835         struct inode *reloc_inode;
4836         struct btrfs_block_group_cache *block_group;
4837         struct btrfs_key key;
4838         u64 cur_byte;
4839         u64 total_found;
4840         u32 nritems;
4841         int ret;
4842         int progress;
4843         int pass = 0;
4844
4845         root = root->fs_info->extent_root;
4846
4847         block_group = btrfs_lookup_block_group(info, group_start);
4848         BUG_ON(!block_group);
4849
4850         printk("btrfs relocating block group %llu flags %llu\n",
4851                (unsigned long long)block_group->key.objectid,
4852                (unsigned long long)block_group->flags);
4853
4854         path = btrfs_alloc_path();
4855         BUG_ON(!path);
4856
4857         reloc_inode = create_reloc_inode(info, block_group);
4858         BUG_ON(IS_ERR(reloc_inode));
4859
4860         __alloc_chunk_for_shrink(root, block_group, 1);
4861         block_group->ro = 1;
4862         block_group->space_info->total_bytes -= block_group->key.offset;
4863
4864         btrfs_start_delalloc_inodes(info->tree_root);
4865         btrfs_wait_ordered_extents(info->tree_root, 0);
4866 again:
4867         total_found = 0;
4868         progress = 0;
4869         key.objectid = block_group->key.objectid;
4870         key.offset = 0;
4871         key.type = 0;
4872         cur_byte = key.objectid;
4873
4874         trans = btrfs_start_transaction(info->tree_root, 1);
4875         btrfs_commit_transaction(trans, info->tree_root);
4876
4877         mutex_lock(&root->fs_info->cleaner_mutex);
4878         btrfs_clean_old_snapshots(info->tree_root);
4879         btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
4880         mutex_unlock(&root->fs_info->cleaner_mutex);
4881
4882         while(1) {
4883                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4884                 if (ret < 0)
4885                         goto out;
4886 next:
4887                 leaf = path->nodes[0];
4888                 nritems = btrfs_header_nritems(leaf);
4889                 if (path->slots[0] >= nritems) {
4890                         ret = btrfs_next_leaf(root, path);
4891                         if (ret < 0)
4892                                 goto out;
4893                         if (ret == 1) {
4894                                 ret = 0;
4895                                 break;
4896                         }
4897                         leaf = path->nodes[0];
4898                         nritems = btrfs_header_nritems(leaf);
4899                 }
4900
4901                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4902
4903                 if (key.objectid >= block_group->key.objectid +
4904                     block_group->key.offset)
4905                         break;
4906
4907                 if (progress && need_resched()) {
4908                         btrfs_release_path(root, path);
4909                         cond_resched();
4910                         progress = 0;
4911                         continue;
4912                 }
4913                 progress = 1;
4914
4915                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
4916                     key.objectid + key.offset <= cur_byte) {
4917                         path->slots[0]++;
4918                         goto next;
4919                 }
4920
4921                 total_found++;
4922                 cur_byte = key.objectid + key.offset;
4923                 btrfs_release_path(root, path);
4924
4925                 __alloc_chunk_for_shrink(root, block_group, 0);
4926                 ret = relocate_one_extent(root, path, &key, block_group,
4927                                           reloc_inode, pass);
4928                 BUG_ON(ret < 0);
4929
4930                 key.objectid = cur_byte;
4931                 key.type = 0;
4932                 key.offset = 0;
4933         }
4934
4935         btrfs_release_path(root, path);
4936
4937         if (pass == 0) {
4938                 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
4939                 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
4940                 WARN_ON(reloc_inode->i_mapping->nrpages);
4941         }
4942
4943         if (total_found > 0) {
4944                 printk("btrfs found %llu extents in pass %d\n",
4945                        (unsigned long long)total_found, pass);
4946                 pass++;
4947                 goto again;
4948         }
4949
4950         /* delete reloc_inode */
4951         iput(reloc_inode);
4952
4953         /* unpin extents in this range */
4954         trans = btrfs_start_transaction(info->tree_root, 1);
4955         btrfs_commit_transaction(trans, info->tree_root);
4956
4957         spin_lock(&block_group->lock);
4958         WARN_ON(block_group->pinned > 0);
4959         WARN_ON(block_group->reserved > 0);
4960         WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
4961         spin_unlock(&block_group->lock);
4962         ret = 0;
4963 out:
4964         btrfs_free_path(path);
4965         return ret;
4966 }
4967
4968 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
4969                            struct btrfs_key *key)
4970 {
4971         int ret = 0;
4972         struct btrfs_key found_key;
4973         struct extent_buffer *leaf;
4974         int slot;
4975
4976         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
4977         if (ret < 0)
4978                 goto out;
4979
4980         while(1) {
4981                 slot = path->slots[0];
4982                 leaf = path->nodes[0];
4983                 if (slot >= btrfs_header_nritems(leaf)) {
4984                         ret = btrfs_next_leaf(root, path);
4985                         if (ret == 0)
4986                                 continue;
4987                         if (ret < 0)
4988                                 goto out;
4989                         break;
4990                 }
4991                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4992
4993                 if (found_key.objectid >= key->objectid &&
4994                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
4995                         ret = 0;
4996                         goto out;
4997                 }
4998                 path->slots[0]++;
4999         }
5000         ret = -ENOENT;
5001 out:
5002         return ret;
5003 }
5004
5005 int btrfs_free_block_groups(struct btrfs_fs_info *info)
5006 {
5007         struct btrfs_block_group_cache *block_group;
5008         struct rb_node *n;
5009
5010         spin_lock(&info->block_group_cache_lock);
5011         while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
5012                 block_group = rb_entry(n, struct btrfs_block_group_cache,
5013                                        cache_node);
5014
5015                 spin_unlock(&info->block_group_cache_lock);
5016                 btrfs_remove_free_space_cache(block_group);
5017                 spin_lock(&info->block_group_cache_lock);
5018
5019                 rb_erase(&block_group->cache_node,
5020                          &info->block_group_cache_tree);
5021                 down_write(&block_group->space_info->groups_sem);
5022                 list_del(&block_group->list);
5023                 up_write(&block_group->space_info->groups_sem);
5024                 kfree(block_group);
5025         }
5026         spin_unlock(&info->block_group_cache_lock);
5027         return 0;
5028 }
5029
5030 int btrfs_read_block_groups(struct btrfs_root *root)
5031 {
5032         struct btrfs_path *path;
5033         int ret;
5034         struct btrfs_block_group_cache *cache;
5035         struct btrfs_fs_info *info = root->fs_info;
5036         struct btrfs_space_info *space_info;
5037         struct btrfs_key key;
5038         struct btrfs_key found_key;
5039         struct extent_buffer *leaf;
5040
5041         root = info->extent_root;
5042         key.objectid = 0;
5043         key.offset = 0;
5044         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
5045         path = btrfs_alloc_path();
5046         if (!path)
5047                 return -ENOMEM;
5048
5049         while(1) {
5050                 ret = find_first_block_group(root, path, &key);
5051                 if (ret > 0) {
5052                         ret = 0;
5053                         goto error;
5054                 }
5055                 if (ret != 0)
5056                         goto error;
5057
5058                 leaf = path->nodes[0];
5059                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5060                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
5061                 if (!cache) {
5062                         ret = -ENOMEM;
5063                         break;
5064                 }
5065
5066                 spin_lock_init(&cache->lock);
5067                 mutex_init(&cache->alloc_mutex);
5068                 INIT_LIST_HEAD(&cache->list);
5069                 read_extent_buffer(leaf, &cache->item,
5070                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
5071                                    sizeof(cache->item));
5072                 memcpy(&cache->key, &found_key, sizeof(found_key));
5073
5074                 key.objectid = found_key.objectid + found_key.offset;
5075                 btrfs_release_path(root, path);
5076                 cache->flags = btrfs_block_group_flags(&cache->item);
5077
5078                 ret = update_space_info(info, cache->flags, found_key.offset,
5079                                         btrfs_block_group_used(&cache->item),
5080                                         &space_info);
5081                 BUG_ON(ret);
5082                 cache->space_info = space_info;
5083                 down_write(&space_info->groups_sem);
5084                 list_add_tail(&cache->list, &space_info->block_groups);
5085                 up_write(&space_info->groups_sem);
5086
5087                 ret = btrfs_add_block_group_cache(root->fs_info, cache);
5088                 BUG_ON(ret);
5089
5090                 set_avail_alloc_bits(root->fs_info, cache->flags);
5091         }
5092         ret = 0;
5093 error:
5094         btrfs_free_path(path);
5095         return ret;
5096 }
5097
5098 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
5099                            struct btrfs_root *root, u64 bytes_used,
5100                            u64 type, u64 chunk_objectid, u64 chunk_offset,
5101                            u64 size)
5102 {
5103         int ret;
5104         struct btrfs_root *extent_root;
5105         struct btrfs_block_group_cache *cache;
5106
5107         extent_root = root->fs_info->extent_root;
5108
5109         root->fs_info->last_trans_new_blockgroup = trans->transid;
5110
5111         cache = kzalloc(sizeof(*cache), GFP_NOFS);
5112         if (!cache)
5113                 return -ENOMEM;
5114
5115         cache->key.objectid = chunk_offset;
5116         cache->key.offset = size;
5117         spin_lock_init(&cache->lock);
5118         mutex_init(&cache->alloc_mutex);
5119         INIT_LIST_HEAD(&cache->list);
5120         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
5121
5122         btrfs_set_block_group_used(&cache->item, bytes_used);
5123         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
5124         cache->flags = type;
5125         btrfs_set_block_group_flags(&cache->item, type);
5126
5127         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
5128                                 &cache->space_info);
5129         BUG_ON(ret);
5130         down_write(&cache->space_info->groups_sem);
5131         list_add_tail(&cache->list, &cache->space_info->block_groups);
5132         up_write(&cache->space_info->groups_sem);
5133
5134         ret = btrfs_add_block_group_cache(root->fs_info, cache);
5135         BUG_ON(ret);
5136
5137         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
5138                                 sizeof(cache->item));
5139         BUG_ON(ret);
5140
5141         finish_current_insert(trans, extent_root, 0);
5142         ret = del_pending_extents(trans, extent_root, 0);
5143         BUG_ON(ret);
5144         set_avail_alloc_bits(extent_root->fs_info, type);
5145
5146         return 0;
5147 }
5148
5149 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5150                              struct btrfs_root *root, u64 group_start)
5151 {
5152         struct btrfs_path *path;
5153         struct btrfs_block_group_cache *block_group;
5154         struct btrfs_key key;
5155         int ret;
5156
5157         root = root->fs_info->extent_root;
5158
5159         block_group = btrfs_lookup_block_group(root->fs_info, group_start);
5160         BUG_ON(!block_group);
5161
5162         memcpy(&key, &block_group->key, sizeof(key));
5163
5164         path = btrfs_alloc_path();
5165         BUG_ON(!path);
5166
5167         btrfs_remove_free_space_cache(block_group);
5168         rb_erase(&block_group->cache_node,
5169                  &root->fs_info->block_group_cache_tree);
5170         down_write(&block_group->space_info->groups_sem);
5171         list_del(&block_group->list);
5172         up_write(&block_group->space_info->groups_sem);
5173
5174         /*
5175         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
5176         kfree(shrink_block_group);
5177         */
5178
5179         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5180         if (ret > 0)
5181                 ret = -EIO;
5182         if (ret < 0)
5183                 goto out;
5184
5185         ret = btrfs_del_item(trans, root, path);
5186 out:
5187         btrfs_free_path(path);
5188         return ret;
5189 }