btrfs: Code cleanup
[safe/jmp/linux-2.6] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/module.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "print-tree.h"
23 #include "transaction.h"
24
25 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
26                             *orig_root, u64 num_blocks, u64 search_start,
27                             u64 search_end, u64 hint_block,
28                             struct btrfs_key *ins, int data);
29 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
30                                  btrfs_root *extent_root);
31 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
32                                btrfs_root *extent_root);
33
34 static void reada_extent_leaves(struct btrfs_root *root,
35                                 struct btrfs_path *path, u64 limit)
36 {
37         struct btrfs_node *node;
38         int i;
39         int nritems;
40         u64 item_objectid;
41         u64 blocknr;
42         int slot;
43         int ret;
44
45         if (!path->nodes[1])
46                 return;
47         node = btrfs_buffer_node(path->nodes[1]);
48         slot = path->slots[1] + 1;
49         nritems = btrfs_header_nritems(&node->header);
50         for (i = slot; i < nritems && i < slot + 8; i++) {
51                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
52                 if (item_objectid > limit)
53                         break;
54                 blocknr = btrfs_node_blockptr(node, i);
55                 ret = readahead_tree_block(root, blocknr);
56                 if (ret)
57                         break;
58         }
59 }
60
61 static int cache_block_group(struct btrfs_root *root,
62                              struct btrfs_block_group_cache *block_group)
63 {
64         struct btrfs_path *path;
65         int ret;
66         struct btrfs_key key;
67         struct btrfs_leaf *leaf;
68         struct radix_tree_root *extent_radix;
69         int slot;
70         u64 i;
71         u64 last = 0;
72         u64 hole_size;
73         u64 limit;
74         int found = 0;
75
76         root = root->fs_info->extent_root;
77         extent_radix = &root->fs_info->extent_map_radix;
78
79         if (block_group->cached)
80                 return 0;
81         if (block_group->data)
82                 return 0;
83         path = btrfs_alloc_path();
84         if (!path)
85                 return -ENOMEM;
86         key.objectid = block_group->key.objectid;
87         key.flags = 0;
88         key.offset = 0;
89         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
90         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
91         if (ret < 0)
92                 return ret;
93         if (ret && path->slots[0] > 0)
94                 path->slots[0]--;
95         limit = block_group->key.objectid + block_group->key.offset;
96         reada_extent_leaves(root, path, limit);
97         while(1) {
98                 leaf = btrfs_buffer_leaf(path->nodes[0]);
99                 slot = path->slots[0];
100                 if (slot >= btrfs_header_nritems(&leaf->header)) {
101                         reada_extent_leaves(root, path, limit);
102                         ret = btrfs_next_leaf(root, path);
103                         if (ret == 0) {
104                                 continue;
105                         } else {
106                                 if (found) {
107                                         hole_size = block_group->key.objectid +
108                                                 block_group->key.offset - last;
109                                 } else {
110                                         last = block_group->key.objectid;
111                                         hole_size = block_group->key.offset;
112                                 }
113                                 for (i = 0; i < hole_size; i++) {
114                                         set_radix_bit(extent_radix,
115                                                       last + i);
116                                 }
117                                 break;
118                         }
119                 }
120                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
121                 if (key.objectid >= block_group->key.objectid +
122                     block_group->key.offset) {
123                         if (found) {
124                                 hole_size = block_group->key.objectid +
125                                         block_group->key.offset - last;
126                         } else {
127                                 last = block_group->key.objectid;
128                                 hole_size = block_group->key.offset;
129                         }
130                         for (i = 0; i < hole_size; i++) {
131                                 set_radix_bit(extent_radix, last + i);
132                         }
133                         break;
134                 }
135                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
136                         if (!found) {
137                                 last = key.objectid + key.offset;
138                                 found = 1;
139                         } else {
140                                 hole_size = key.objectid - last;
141                                 for (i = 0; i < hole_size; i++) {
142                                         set_radix_bit(extent_radix, last + i);
143                                 }
144                                 last = key.objectid + key.offset;
145                         }
146                 }
147                 path->slots[0]++;
148         }
149
150         block_group->cached = 1;
151         btrfs_free_path(path);
152         return 0;
153 }
154
155 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
156                                                          btrfs_fs_info *info,
157                                                          u64 blocknr)
158 {
159         struct btrfs_block_group_cache *block_group;
160         int ret;
161
162         ret = radix_tree_gang_lookup(&info->block_group_radix,
163                                      (void **)&block_group,
164                                      blocknr, 1);
165         if (ret) {
166                 if (block_group->key.objectid <= blocknr && blocknr <=
167                     block_group->key.objectid + block_group->key.offset)
168                         return block_group;
169         }
170         ret = radix_tree_gang_lookup(&info->block_group_data_radix,
171                                      (void **)&block_group,
172                                      blocknr, 1);
173         if (ret) {
174                 if (block_group->key.objectid <= blocknr && blocknr <=
175                     block_group->key.objectid + block_group->key.offset)
176                         return block_group;
177         }
178         return NULL;
179 }
180
181 static u64 leaf_range(struct btrfs_root *root)
182 {
183         u64 size = BTRFS_LEAF_DATA_SIZE(root);
184         do_div(size, sizeof(struct btrfs_extent_item) +
185                 sizeof(struct btrfs_item));
186         return size;
187 }
188
189 static u64 find_search_start(struct btrfs_root *root,
190                              struct btrfs_block_group_cache **cache_ret,
191                              u64 search_start, int num)
192 {
193         unsigned long gang[8];
194         int ret;
195         struct btrfs_block_group_cache *cache = *cache_ret;
196         u64 last = max(search_start, cache->key.objectid);
197
198         if (cache->data)
199                 goto out;
200         if (num > 1) {
201                 last = max(last, cache->last_prealloc);
202         }
203 again:
204         cache_block_group(root, cache);
205         while(1) {
206                 ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
207                                            gang, last, ARRAY_SIZE(gang));
208                 if (!ret)
209                         goto out;
210                 last = gang[ret-1] + 1;
211                 if (num > 1) {
212                         if (ret != ARRAY_SIZE(gang)) {
213                                 goto new_group;
214                         }
215                         if (gang[ret-1] - gang[0] > leaf_range(root)) {
216                                 continue;
217                         }
218                 }
219                 if (gang[0] >= cache->key.objectid + cache->key.offset) {
220                         goto new_group;
221                 }
222                 return gang[0];
223         }
224 out:
225         return max(cache->last_alloc, search_start);
226
227 new_group:
228         cache = btrfs_lookup_block_group(root->fs_info,
229                                          last + cache->key.offset - 1);
230         if (!cache) {
231                 return max((*cache_ret)->last_alloc, search_start);
232         }
233         cache = btrfs_find_block_group(root, cache,
234                                        last + cache->key.offset - 1, 0, 0);
235         *cache_ret = cache;
236         goto again;
237 }
238
239 static u64 div_factor(u64 num, int factor)
240 {
241         num *= factor;
242         do_div(num, 10);
243         return num;
244 }
245
246 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
247                                                  struct btrfs_block_group_cache
248                                                  *hint, u64 search_start,
249                                                  int data, int owner)
250 {
251         struct btrfs_block_group_cache *cache[8];
252         struct btrfs_block_group_cache *found_group = NULL;
253         struct btrfs_fs_info *info = root->fs_info;
254         struct radix_tree_root *radix;
255         struct radix_tree_root *swap_radix;
256         u64 used;
257         u64 last = 0;
258         u64 hint_last;
259         int i;
260         int ret;
261         int full_search = 0;
262         int factor = 8;
263         int data_swap = 0;
264
265         if (!owner)
266                 factor = 5;
267
268         if (data) {
269                 radix = &info->block_group_data_radix;
270                 swap_radix = &info->block_group_radix;
271         } else {
272                 radix = &info->block_group_radix;
273                 swap_radix = &info->block_group_data_radix;
274         }
275
276         if (search_start) {
277                 struct btrfs_block_group_cache *shint;
278                 shint = btrfs_lookup_block_group(info, search_start);
279                 if (shint->data == data) {
280                         used = btrfs_block_group_used(&shint->item);
281                         if (used + shint->pinned <
282                             div_factor(shint->key.offset, factor)) {
283                                 return shint;
284                         }
285                 }
286         }
287         if (hint && hint->data == data) {
288                 used = btrfs_block_group_used(&hint->item);
289                 if (used + hint->pinned <
290                     div_factor(hint->key.offset, factor)) {
291                         return hint;
292                 }
293                 if (used >= div_factor(hint->key.offset, 8)) {
294                         radix_tree_tag_clear(radix,
295                                              hint->key.objectid +
296                                              hint->key.offset - 1,
297                                              BTRFS_BLOCK_GROUP_AVAIL);
298                 }
299                 last = hint->key.offset * 3;
300                 if (hint->key.objectid >= last)
301                         last = max(search_start + hint->key.offset - 1,
302                                    hint->key.objectid - last);
303                 else
304                         last = hint->key.objectid + hint->key.offset;
305                 hint_last = last;
306         } else {
307                 if (hint)
308                         hint_last = max(hint->key.objectid, search_start);
309                 else
310                         hint_last = search_start;
311
312                 last = hint_last;
313         }
314         while(1) {
315                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
316                                                  last, ARRAY_SIZE(cache),
317                                                  BTRFS_BLOCK_GROUP_AVAIL);
318                 if (!ret)
319                         break;
320                 for (i = 0; i < ret; i++) {
321                         last = cache[i]->key.objectid +
322                                 cache[i]->key.offset;
323                         used = btrfs_block_group_used(&cache[i]->item);
324                         if (used + cache[i]->pinned <
325                             div_factor(cache[i]->key.offset, factor)) {
326                                 found_group = cache[i];
327                                 goto found;
328                         }
329                         if (used >= div_factor(cache[i]->key.offset, 8)) {
330                                 radix_tree_tag_clear(radix,
331                                                      cache[i]->key.objectid +
332                                                      cache[i]->key.offset - 1,
333                                                      BTRFS_BLOCK_GROUP_AVAIL);
334                         }
335                 }
336                 cond_resched();
337         }
338         last = hint_last;
339 again:
340         while(1) {
341                 ret = radix_tree_gang_lookup(radix, (void **)cache,
342                                              last, ARRAY_SIZE(cache));
343                 if (!ret)
344                         break;
345                 for (i = 0; i < ret; i++) {
346                         last = cache[i]->key.objectid +
347                                 cache[i]->key.offset;
348                         used = btrfs_block_group_used(&cache[i]->item);
349                         if (used + cache[i]->pinned < cache[i]->key.offset) {
350                                 found_group = cache[i];
351                                 goto found;
352                         }
353                         if (used >= cache[i]->key.offset) {
354                                 radix_tree_tag_clear(radix,
355                                                      cache[i]->key.objectid +
356                                                      cache[i]->key.offset - 1,
357                                                      BTRFS_BLOCK_GROUP_AVAIL);
358                         }
359                 }
360                 cond_resched();
361         }
362         if (!full_search) {
363                 last = search_start;
364                 full_search = 1;
365                 goto again;
366         }
367         if (!data_swap) {
368                 struct radix_tree_root *tmp = radix;
369                 data_swap = 1;
370                 radix = swap_radix;
371                 swap_radix = tmp;
372                 last = search_start;
373                 goto again;
374         }
375         if (!found_group) {
376                 ret = radix_tree_gang_lookup(radix,
377                                              (void **)&found_group, 0, 1);
378                 if (ret == 0) {
379                         ret = radix_tree_gang_lookup(swap_radix,
380                                                      (void **)&found_group,
381                                                      0, 1);
382                 }
383                 BUG_ON(ret != 1);
384         }
385 found:
386         return found_group;
387 }
388
389 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
390                                 struct btrfs_root *root,
391                                 u64 blocknr, u64 num_blocks)
392 {
393         struct btrfs_path *path;
394         int ret;
395         struct btrfs_key key;
396         struct btrfs_leaf *l;
397         struct btrfs_extent_item *item;
398         struct btrfs_key ins;
399         u32 refs;
400
401         find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, 0,
402                          &ins, 0);
403         path = btrfs_alloc_path();
404         BUG_ON(!path);
405         key.objectid = blocknr;
406         key.flags = 0;
407         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
408         key.offset = num_blocks;
409         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
410                                 0, 1);
411         if (ret != 0) {
412                 BUG();
413         }
414         BUG_ON(ret != 0);
415         l = btrfs_buffer_leaf(path->nodes[0]);
416         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
417         refs = btrfs_extent_refs(item);
418         btrfs_set_extent_refs(item, refs + 1);
419         btrfs_mark_buffer_dirty(path->nodes[0]);
420
421         btrfs_release_path(root->fs_info->extent_root, path);
422         btrfs_free_path(path);
423         finish_current_insert(trans, root->fs_info->extent_root);
424         del_pending_extents(trans, root->fs_info->extent_root);
425         return 0;
426 }
427
428 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
429                              struct btrfs_root *root, u64 blocknr,
430                              u64 num_blocks, u32 *refs)
431 {
432         struct btrfs_path *path;
433         int ret;
434         struct btrfs_key key;
435         struct btrfs_leaf *l;
436         struct btrfs_extent_item *item;
437
438         path = btrfs_alloc_path();
439         key.objectid = blocknr;
440         key.offset = num_blocks;
441         key.flags = 0;
442         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
443         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
444                                 0, 0);
445         if (ret != 0)
446                 BUG();
447         l = btrfs_buffer_leaf(path->nodes[0]);
448         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
449         *refs = btrfs_extent_refs(item);
450         btrfs_release_path(root->fs_info->extent_root, path);
451         btrfs_free_path(path);
452         return 0;
453 }
454
455 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
456                        struct btrfs_root *root)
457 {
458         return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
459 }
460
461 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
462                   struct buffer_head *buf)
463 {
464         u64 blocknr;
465         struct btrfs_node *buf_node;
466         struct btrfs_leaf *buf_leaf;
467         struct btrfs_disk_key *key;
468         struct btrfs_file_extent_item *fi;
469         int i;
470         int leaf;
471         int ret;
472
473         if (!root->ref_cows)
474                 return 0;
475         buf_node = btrfs_buffer_node(buf);
476         leaf = btrfs_is_leaf(buf_node);
477         buf_leaf = btrfs_buffer_leaf(buf);
478         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
479                 if (leaf) {
480                         u64 disk_blocknr;
481                         key = &buf_leaf->items[i].key;
482                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
483                                 continue;
484                         fi = btrfs_item_ptr(buf_leaf, i,
485                                             struct btrfs_file_extent_item);
486                         if (btrfs_file_extent_type(fi) ==
487                             BTRFS_FILE_EXTENT_INLINE)
488                                 continue;
489                         disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
490                         if (disk_blocknr == 0)
491                                 continue;
492                         ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
493                                     btrfs_file_extent_disk_num_blocks(fi));
494                         BUG_ON(ret);
495                 } else {
496                         blocknr = btrfs_node_blockptr(buf_node, i);
497                         ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
498                         BUG_ON(ret);
499                 }
500         }
501         return 0;
502 }
503
504 static int write_one_cache_group(struct btrfs_trans_handle *trans,
505                                  struct btrfs_root *root,
506                                  struct btrfs_path *path,
507                                  struct btrfs_block_group_cache *cache)
508 {
509         int ret;
510         int pending_ret;
511         struct btrfs_root *extent_root = root->fs_info->extent_root;
512         struct btrfs_block_group_item *bi;
513         struct btrfs_key ins;
514
515         find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0);
516         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
517         BUG_ON(ret);
518         bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
519                             struct btrfs_block_group_item);
520         memcpy(bi, &cache->item, sizeof(*bi));
521         mark_buffer_dirty(path->nodes[0]);
522         btrfs_release_path(extent_root, path);
523
524         finish_current_insert(trans, extent_root);
525         pending_ret = del_pending_extents(trans, extent_root);
526         if (ret)
527                 return ret;
528         if (pending_ret)
529                 return pending_ret;
530         if (cache->data)
531                 cache->last_alloc = cache->first_free;
532         return 0;
533
534 }
535
536 static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
537                                    struct btrfs_root *root,
538                                    struct radix_tree_root *radix)
539 {
540         struct btrfs_block_group_cache *cache[8];
541         int ret;
542         int err = 0;
543         int werr = 0;
544         int i;
545         struct btrfs_path *path;
546
547         path = btrfs_alloc_path();
548         if (!path)
549                 return -ENOMEM;
550
551         while(1) {
552                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
553                                                  0, ARRAY_SIZE(cache),
554                                                  BTRFS_BLOCK_GROUP_DIRTY);
555                 if (!ret)
556                         break;
557                 for (i = 0; i < ret; i++) {
558                         radix_tree_tag_clear(radix, cache[i]->key.objectid +
559                                              cache[i]->key.offset - 1,
560                                              BTRFS_BLOCK_GROUP_DIRTY);
561                         err = write_one_cache_group(trans, root,
562                                                     path, cache[i]);
563                         if (err)
564                                 werr = err;
565                 }
566         }
567         btrfs_free_path(path);
568         return werr;
569 }
570
571 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
572                                    struct btrfs_root *root)
573 {
574         int ret;
575         int ret2;
576         ret = write_dirty_block_radix(trans, root,
577                                       &root->fs_info->block_group_radix);
578         ret2 = write_dirty_block_radix(trans, root,
579                                       &root->fs_info->block_group_data_radix);
580         if (ret)
581                 return ret;
582         if (ret2)
583                 return ret2;
584         return 0;
585 }
586
587 static int update_block_group(struct btrfs_trans_handle *trans,
588                               struct btrfs_root *root,
589                               u64 blocknr, u64 num, int alloc, int mark_free,
590                               int data)
591 {
592         struct btrfs_block_group_cache *cache;
593         struct btrfs_fs_info *info = root->fs_info;
594         u64 total = num;
595         u64 old_val;
596         u64 block_in_group;
597         u64 i;
598         int ret;
599
600         while(total) {
601                 cache = btrfs_lookup_block_group(info, blocknr);
602                 if (!cache) {
603                         return -1;
604                 }
605                 block_in_group = blocknr - cache->key.objectid;
606                 WARN_ON(block_in_group > cache->key.offset);
607                 radix_tree_tag_set(cache->radix, cache->key.objectid +
608                                    cache->key.offset - 1,
609                                    BTRFS_BLOCK_GROUP_DIRTY);
610
611                 old_val = btrfs_block_group_used(&cache->item);
612                 num = min(total, cache->key.offset - block_in_group);
613                 if (alloc) {
614                         if (blocknr > cache->last_alloc)
615                                 cache->last_alloc = blocknr;
616                         if (!cache->data) {
617                                 for (i = 0; i < num; i++) {
618                                         clear_radix_bit(&info->extent_map_radix,
619                                                         blocknr + i);
620                                 }
621                         }
622                         if (cache->data != data &&
623                             old_val < (cache->key.offset >> 1)) {
624                                 cache->data = data;
625                                 radix_tree_delete(cache->radix,
626                                                   cache->key.objectid +
627                                                   cache->key.offset - 1);
628
629                                 if (data) {
630                                         cache->radix =
631                                                 &info->block_group_data_radix;
632                                         cache->item.flags |=
633                                                 BTRFS_BLOCK_GROUP_DATA;
634                                 } else {
635                                         cache->radix = &info->block_group_radix;
636                                         cache->item.flags &=
637                                                 ~BTRFS_BLOCK_GROUP_DATA;
638                                 }
639                                 ret = radix_tree_insert(cache->radix,
640                                                         cache->key.objectid +
641                                                         cache->key.offset - 1,
642                                                         (void *)cache);
643                         }
644                         old_val += num;
645                 } else {
646                         old_val -= num;
647                         if (blocknr < cache->first_free)
648                                 cache->first_free = blocknr;
649                         if (!cache->data && mark_free) {
650                                 for (i = 0; i < num; i++) {
651                                         set_radix_bit(&info->extent_map_radix,
652                                                       blocknr + i);
653                                 }
654                         }
655                         if (old_val < (cache->key.offset >> 1) &&
656                             old_val + num >= (cache->key.offset >> 1)) {
657                                 radix_tree_tag_set(cache->radix,
658                                                    cache->key.objectid +
659                                                    cache->key.offset - 1,
660                                                    BTRFS_BLOCK_GROUP_AVAIL);
661                         }
662                 }
663                 btrfs_set_block_group_used(&cache->item, old_val);
664                 total -= num;
665                 blocknr += num;
666         }
667         return 0;
668 }
669
670 static int try_remove_page(struct address_space *mapping, unsigned long index)
671 {
672         int ret;
673         ret = invalidate_mapping_pages(mapping, index, index);
674         return ret;
675 }
676
677 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
678                                btrfs_root *root)
679 {
680         unsigned long gang[8];
681         struct inode *btree_inode = root->fs_info->btree_inode;
682         struct btrfs_block_group_cache *block_group;
683         u64 first = 0;
684         int ret;
685         int i;
686         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
687         struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
688
689         while(1) {
690                 ret = find_first_radix_bit(pinned_radix, gang, 0,
691                                            ARRAY_SIZE(gang));
692                 if (!ret)
693                         break;
694                 if (!first)
695                         first = gang[0];
696                 for (i = 0; i < ret; i++) {
697                         clear_radix_bit(pinned_radix, gang[i]);
698                         block_group = btrfs_lookup_block_group(root->fs_info,
699                                                                gang[i]);
700                         if (block_group) {
701                                 WARN_ON(block_group->pinned == 0);
702                                 block_group->pinned--;
703                                 if (gang[i] < block_group->last_alloc)
704                                         block_group->last_alloc = gang[i];
705                                 if (gang[i] < block_group->last_prealloc)
706                                         block_group->last_prealloc = gang[i];
707                                 if (!block_group->data)
708                                         set_radix_bit(extent_radix, gang[i]);
709                         }
710                         try_remove_page(btree_inode->i_mapping,
711                                         gang[i] << (PAGE_CACHE_SHIFT -
712                                                     btree_inode->i_blkbits));
713                 }
714         }
715         return 0;
716 }
717
718 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
719                                  btrfs_root *extent_root)
720 {
721         struct btrfs_key ins;
722         struct btrfs_extent_item extent_item;
723         int i;
724         int ret;
725         u64 super_blocks_used;
726         struct btrfs_fs_info *info = extent_root->fs_info;
727
728         btrfs_set_extent_refs(&extent_item, 1);
729         ins.offset = 1;
730         ins.flags = 0;
731         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
732         btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
733
734         for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
735                 ins.objectid = extent_root->fs_info->extent_tree_insert[i];
736                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
737                 btrfs_set_super_blocks_used(info->disk_super,
738                                             super_blocks_used + 1);
739                 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
740                                         sizeof(extent_item));
741                 BUG_ON(ret);
742         }
743         extent_root->fs_info->extent_tree_insert_nr = 0;
744         extent_root->fs_info->extent_tree_prealloc_nr = 0;
745         return 0;
746 }
747
748 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
749 {
750         int err;
751         struct btrfs_header *header;
752         struct buffer_head *bh;
753
754         if (!pending) {
755                 bh = btrfs_find_tree_block(root, blocknr);
756                 if (bh) {
757                         if (buffer_uptodate(bh)) {
758                                 u64 transid =
759                                     root->fs_info->running_transaction->transid;
760                                 header = btrfs_buffer_header(bh);
761                                 if (btrfs_header_generation(header) ==
762                                     transid) {
763                                         btrfs_block_release(root, bh);
764                                         return 0;
765                                 }
766                         }
767                         btrfs_block_release(root, bh);
768                 }
769                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
770                 if (!err) {
771                         struct btrfs_block_group_cache *cache;
772                         cache = btrfs_lookup_block_group(root->fs_info,
773                                                          blocknr);
774                         if (cache)
775                                 cache->pinned++;
776                 }
777         } else {
778                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
779         }
780         BUG_ON(err < 0);
781         return 0;
782 }
783
784 /*
785  * remove an extent from the root, returns 0 on success
786  */
787 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
788                          *root, u64 blocknr, u64 num_blocks, int pin,
789                          int mark_free)
790 {
791         struct btrfs_path *path;
792         struct btrfs_key key;
793         struct btrfs_fs_info *info = root->fs_info;
794         struct btrfs_root *extent_root = info->extent_root;
795         int ret;
796         struct btrfs_extent_item *ei;
797         struct btrfs_key ins;
798         u32 refs;
799
800         key.objectid = blocknr;
801         key.flags = 0;
802         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
803         key.offset = num_blocks;
804
805         find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0);
806         path = btrfs_alloc_path();
807         BUG_ON(!path);
808
809         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
810         if (ret) {
811                 BUG();
812         }
813         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
814                             struct btrfs_extent_item);
815         BUG_ON(ei->refs == 0);
816         refs = btrfs_extent_refs(ei) - 1;
817         btrfs_set_extent_refs(ei, refs);
818         btrfs_mark_buffer_dirty(path->nodes[0]);
819         if (refs == 0) {
820                 u64 super_blocks_used;
821
822                 if (pin) {
823                         ret = pin_down_block(root, blocknr, 0);
824                         BUG_ON(ret);
825                 }
826
827                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
828                 btrfs_set_super_blocks_used(info->disk_super,
829                                             super_blocks_used - num_blocks);
830                 ret = btrfs_del_item(trans, extent_root, path);
831                 if (ret)
832                         BUG();
833                 ret = update_block_group(trans, root, blocknr, num_blocks, 0,
834                                          mark_free, 0);
835                 BUG_ON(ret);
836         }
837         btrfs_free_path(path);
838         finish_current_insert(trans, extent_root);
839         return ret;
840 }
841
842 /*
843  * find all the blocks marked as pending in the radix tree and remove
844  * them from the extent map
845  */
846 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
847                                btrfs_root *extent_root)
848 {
849         int ret;
850         int wret;
851         int err = 0;
852         unsigned long gang[4];
853         int i;
854         struct radix_tree_root *pending_radix;
855         struct radix_tree_root *pinned_radix;
856         struct btrfs_block_group_cache *cache;
857
858         pending_radix = &extent_root->fs_info->pending_del_radix;
859         pinned_radix = &extent_root->fs_info->pinned_radix;
860
861         while(1) {
862                 ret = find_first_radix_bit(pending_radix, gang, 0,
863                                            ARRAY_SIZE(gang));
864                 if (!ret)
865                         break;
866                 for (i = 0; i < ret; i++) {
867                         wret = set_radix_bit(pinned_radix, gang[i]);
868                         if (wret == 0) {
869                                 cache =
870                                   btrfs_lookup_block_group(extent_root->fs_info,
871                                                            gang[i]);
872                                 if (cache)
873                                         cache->pinned++;
874                         }
875                         if (wret < 0) {
876                                 printk(KERN_CRIT "set_radix_bit, err %d\n",
877                                        wret);
878                                 BUG_ON(wret < 0);
879                         }
880                         wret = clear_radix_bit(pending_radix, gang[i]);
881                         BUG_ON(wret);
882                         wret = __free_extent(trans, extent_root,
883                                              gang[i], 1, 0, 0);
884                         if (wret)
885                                 err = wret;
886                 }
887         }
888         return err;
889 }
890
891 /*
892  * remove an extent from the root, returns 0 on success
893  */
894 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
895                       *root, u64 blocknr, u64 num_blocks, int pin)
896 {
897         struct btrfs_root *extent_root = root->fs_info->extent_root;
898         int pending_ret;
899         int ret;
900
901         if (root == extent_root) {
902                 pin_down_block(root, blocknr, 1);
903                 return 0;
904         }
905         ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
906         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
907         return ret ? ret : pending_ret;
908 }
909
910 /*
911  * walks the btree of allocated extents and find a hole of a given size.
912  * The key ins is changed to record the hole:
913  * ins->objectid == block start
914  * ins->flags = BTRFS_EXTENT_ITEM_KEY
915  * ins->offset == number of blocks
916  * Any available blocks before search_start are skipped.
917  */
918 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
919                             *orig_root, u64 num_blocks, u64 search_start, u64
920                             search_end, u64 hint_block,
921                             struct btrfs_key *ins, int data)
922 {
923         struct btrfs_path *path;
924         struct btrfs_key key;
925         int ret;
926         u64 hole_size = 0;
927         int slot = 0;
928         u64 last_block = 0;
929         u64 test_block;
930         u64 orig_search_start = search_start;
931         int start_found;
932         struct btrfs_leaf *l;
933         struct btrfs_root * root = orig_root->fs_info->extent_root;
934         struct btrfs_fs_info *info = root->fs_info;
935         int total_needed = num_blocks;
936         int total_found = 0;
937         int fill_prealloc = 0;
938         int level;
939         struct btrfs_block_group_cache *block_group;
940         int full_scan = 0;
941         int wrapped = 0;
942         u64 limit;
943
944         path = btrfs_alloc_path();
945         ins->flags = 0;
946         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
947
948         level = btrfs_header_level(btrfs_buffer_header(root->node));
949         if (num_blocks == 0) {
950                 fill_prealloc = 1;
951                 num_blocks = 1;
952                 total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
953         }
954         if (search_end == (u64)-1)
955                 search_end = btrfs_super_total_blocks(info->disk_super);
956         if (hint_block) {
957                 block_group = btrfs_lookup_block_group(info, hint_block);
958                 block_group = btrfs_find_block_group(root, block_group,
959                                                      hint_block, data, 1);
960         } else {
961                 block_group = btrfs_find_block_group(root,
962                                                      trans->block_group, 0,
963                                                      data, 1);
964         }
965
966 check_failed:
967         if (!block_group->data)
968                 search_start = find_search_start(root, &block_group,
969                                                  search_start, total_needed);
970         else if (!full_scan)
971                 search_start = max(block_group->last_alloc, search_start);
972
973         btrfs_init_path(path);
974         ins->objectid = search_start;
975         ins->offset = 0;
976         start_found = 0;
977
978         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
979         if (ret < 0)
980                 goto error;
981
982         if (path->slots[0] > 0) {
983                 path->slots[0]--;
984         }
985
986         l = btrfs_buffer_leaf(path->nodes[0]);
987         btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
988         /*
989          * a rare case, go back one key if we hit a block group item
990          * instead of an extent item
991          */
992         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
993             key.objectid + key.offset >= search_start) {
994                 ins->objectid = key.objectid;
995                 ins->offset = key.offset - 1;
996                 btrfs_release_path(root, path);
997                 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
998                 if (ret < 0)
999                         goto error;
1000
1001                 if (path->slots[0] > 0) {
1002                         path->slots[0]--;
1003                 }
1004         }
1005
1006         while (1) {
1007                 l = btrfs_buffer_leaf(path->nodes[0]);
1008                 slot = path->slots[0];
1009                 if (slot >= btrfs_header_nritems(&l->header)) {
1010                         if (fill_prealloc) {
1011                                 info->extent_tree_prealloc_nr = 0;
1012                                 total_found = 0;
1013                         }
1014                         if (start_found)
1015                                 limit = last_block +
1016                                         (block_group->key.offset >> 1);
1017                         else
1018                                 limit = search_start +
1019                                         (block_group->key.offset >> 1);
1020                         ret = btrfs_next_leaf(root, path);
1021                         if (ret == 0)
1022                                 continue;
1023                         if (ret < 0)
1024                                 goto error;
1025                         if (!start_found) {
1026                                 ins->objectid = search_start;
1027                                 ins->offset = search_end - search_start;
1028                                 start_found = 1;
1029                                 goto check_pending;
1030                         }
1031                         ins->objectid = last_block > search_start ?
1032                                         last_block : search_start;
1033                         ins->offset = search_end - ins->objectid;
1034                         goto check_pending;
1035                 }
1036
1037                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
1038                 if (key.objectid >= search_start && key.objectid > last_block &&
1039                     start_found) {
1040                         if (last_block < search_start)
1041                                 last_block = search_start;
1042                         hole_size = key.objectid - last_block;
1043                         if (hole_size >= num_blocks) {
1044                                 ins->objectid = last_block;
1045                                 ins->offset = hole_size;
1046                                 goto check_pending;
1047                         }
1048                 }
1049
1050                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
1051                         goto next;
1052
1053                 start_found = 1;
1054                 last_block = key.objectid + key.offset;
1055                 if (!full_scan && last_block >= block_group->key.objectid +
1056                     block_group->key.offset) {
1057                         btrfs_release_path(root, path);
1058                         search_start = block_group->key.objectid +
1059                                 block_group->key.offset * 2;
1060                         goto new_group;
1061                 }
1062 next:
1063                 path->slots[0]++;
1064                 cond_resched();
1065         }
1066         // FIXME -ENOSPC
1067 check_pending:
1068         /* we have to make sure we didn't find an extent that has already
1069          * been allocated by the map tree or the original allocation
1070          */
1071         btrfs_release_path(root, path);
1072         BUG_ON(ins->objectid < search_start);
1073
1074         if (ins->objectid + num_blocks >= search_end) {
1075                 if (full_scan) {
1076                         ret = -ENOSPC;
1077                         goto error;
1078                 }
1079                 search_start = orig_search_start;
1080                 if (wrapped)
1081                         full_scan = 1;
1082                 else
1083                         wrapped = 1;
1084                 goto new_group;
1085         }
1086         for (test_block = ins->objectid;
1087              test_block < ins->objectid + num_blocks; test_block++) {
1088                 if (test_radix_bit(&info->pinned_radix, test_block)) {
1089                         search_start = test_block + 1;
1090                         goto new_group;
1091                 }
1092         }
1093         if (!fill_prealloc && info->extent_tree_insert_nr) {
1094                 u64 last =
1095                   info->extent_tree_insert[info->extent_tree_insert_nr - 1];
1096                 if (ins->objectid + num_blocks >
1097                     info->extent_tree_insert[0] &&
1098                     ins->objectid <= last) {
1099                         search_start = last + 1;
1100                         WARN_ON(!full_scan);
1101                         goto new_group;
1102                 }
1103         }
1104         if (!fill_prealloc && info->extent_tree_prealloc_nr) {
1105                 u64 first =
1106                   info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
1107                 if (ins->objectid + num_blocks > first &&
1108                     ins->objectid <= info->extent_tree_prealloc[0]) {
1109                         search_start = info->extent_tree_prealloc[0] + 1;
1110                         WARN_ON(!full_scan);
1111                         goto new_group;
1112                 }
1113         }
1114         if (fill_prealloc) {
1115                 int nr;
1116                 test_block = ins->objectid;
1117                 if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
1118                     leaf_range(root)) {
1119                         total_found = 0;
1120                         info->extent_tree_prealloc_nr = total_found;
1121                 }
1122                 while(test_block < ins->objectid + ins->offset &&
1123                       total_found < total_needed) {
1124                         nr = total_needed - total_found - 1;
1125                         BUG_ON(nr < 0);
1126                         info->extent_tree_prealloc[nr] = test_block;
1127                         total_found++;
1128                         test_block++;
1129                 }
1130                 if (total_found < total_needed) {
1131                         search_start = test_block;
1132                         goto new_group;
1133                 }
1134                 info->extent_tree_prealloc_nr = total_found;
1135         }
1136         if (!data) {
1137                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1138                 if (block_group) {
1139                         if (fill_prealloc)
1140                                 block_group->last_prealloc =
1141                                      info->extent_tree_prealloc[total_needed-1];
1142                         else
1143                                 trans->block_group = block_group;
1144                 }
1145         }
1146         ins->offset = num_blocks;
1147         btrfs_free_path(path);
1148         return 0;
1149
1150 new_group:
1151         if (search_start + num_blocks >= search_end) {
1152                 search_start = orig_search_start;
1153                 if (full_scan) {
1154                         ret = -ENOSPC;
1155                         goto error;
1156                 }
1157                 if (wrapped)
1158                         full_scan = 1;
1159                 else
1160                         wrapped = 1;
1161         }
1162         block_group = btrfs_lookup_block_group(info, search_start);
1163         cond_resched();
1164         if (!full_scan)
1165                 block_group = btrfs_find_block_group(root, block_group,
1166                                                      search_start, data, 0);
1167         goto check_failed;
1168
1169 error:
1170         btrfs_release_path(root, path);
1171         btrfs_free_path(path);
1172         return ret;
1173 }
1174 /*
1175  * finds a free extent and does all the dirty work required for allocation
1176  * returns the key for the extent through ins, and a tree buffer for
1177  * the first block of the extent through buf.
1178  *
1179  * returns 0 if everything worked, non-zero otherwise.
1180  */
1181 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1182                        struct btrfs_root *root, u64 owner,
1183                        u64 num_blocks, u64 hint_block,
1184                        u64 search_end, struct btrfs_key *ins, int data)
1185 {
1186         int ret;
1187         int pending_ret;
1188         u64 super_blocks_used;
1189         u64 search_start = 0;
1190         struct btrfs_fs_info *info = root->fs_info;
1191         struct btrfs_root *extent_root = info->extent_root;
1192         struct btrfs_extent_item extent_item;
1193         struct btrfs_key prealloc_key;
1194
1195         btrfs_set_extent_refs(&extent_item, 1);
1196         btrfs_set_extent_owner(&extent_item, owner);
1197
1198         if (root == extent_root) {
1199                 int nr;
1200                 BUG_ON(info->extent_tree_prealloc_nr == 0);
1201                 BUG_ON(num_blocks != 1);
1202                 ins->offset = 1;
1203                 info->extent_tree_prealloc_nr--;
1204                 nr = info->extent_tree_prealloc_nr;
1205                 ins->objectid = info->extent_tree_prealloc[nr];
1206                 info->extent_tree_insert[info->extent_tree_insert_nr++] =
1207                         ins->objectid;
1208                 ret = update_block_group(trans, root,
1209                                          ins->objectid, ins->offset, 1, 0, 0);
1210                 BUG_ON(ret);
1211                 return 0;
1212         }
1213
1214         /*
1215          * if we're doing a data allocation, preallocate room in the
1216          * extent tree first.  This way the extent tree blocks end up
1217          * in the correct block group.
1218          */
1219         if (data) {
1220                 ret = find_free_extent(trans, root, 0, 0,
1221                                        search_end, 0, &prealloc_key, 0);
1222                 if (ret) {
1223                         return ret;
1224                 }
1225                 if (prealloc_key.objectid + prealloc_key.offset >= search_end) {
1226                         int nr = info->extent_tree_prealloc_nr;
1227                         search_end = info->extent_tree_prealloc[nr - 1] - 1;
1228                 } else {
1229                         search_start = info->extent_tree_prealloc[0] + 1;
1230                 }
1231         }
1232         if (hint_block < search_start)
1233                 hint_block = search_start;
1234         /* do the real allocation */
1235         ret = find_free_extent(trans, root, num_blocks, search_start,
1236                                search_end, hint_block, ins, data);
1237         if (ret) {
1238                 return ret;
1239         }
1240
1241         /*
1242          * if we're doing a metadata allocation, preallocate space in the
1243          * extent tree second.  This way, we don't create a tiny hole
1244          * in the allocation map between any unused preallocation blocks
1245          * and the metadata block we're actually allocating.  On disk,
1246          * it'll go:
1247          * [block we've allocated], [used prealloc 1], [ unused prealloc ]
1248          * The unused prealloc will get reused the next time around.
1249          */
1250         if (!data) {
1251                 if (ins->objectid + ins->offset >= search_end)
1252                         search_end = ins->objectid - 1;
1253                 else
1254                         search_start = ins->objectid + ins->offset;
1255
1256                 if (hint_block < search_start)
1257                         hint_block = search_start;
1258
1259                 ret = find_free_extent(trans, root, 0, search_start,
1260                                        search_end, hint_block,
1261                                        &prealloc_key, 0);
1262                 if (ret) {
1263                         return ret;
1264                 }
1265         }
1266
1267         super_blocks_used = btrfs_super_blocks_used(info->disk_super);
1268         btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
1269                                     num_blocks);
1270         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1271                                 sizeof(extent_item));
1272
1273         finish_current_insert(trans, extent_root);
1274         pending_ret = del_pending_extents(trans, extent_root);
1275         if (ret) {
1276                 return ret;
1277         }
1278         if (pending_ret) {
1279                 return pending_ret;
1280         }
1281         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1282                                  data);
1283         BUG_ON(ret);
1284         return 0;
1285 }
1286
1287 /*
1288  * helper function to allocate a block for a given tree
1289  * returns the tree buffer or NULL.
1290  */
1291 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1292                                            struct btrfs_root *root, u64 hint)
1293 {
1294         struct btrfs_key ins;
1295         int ret;
1296         struct buffer_head *buf;
1297
1298         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1299                                  1, hint, (unsigned long)-1, &ins, 0);
1300         if (ret) {
1301                 BUG();
1302                 return NULL;
1303         }
1304         BUG_ON(ret);
1305         buf = btrfs_find_create_tree_block(root, ins.objectid);
1306         set_buffer_uptodate(buf);
1307         set_buffer_checked(buf);
1308         set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1309         return buf;
1310 }
1311
1312 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1313                          struct btrfs_root *root, struct buffer_head *cur)
1314 {
1315         struct btrfs_disk_key *key;
1316         struct btrfs_leaf *leaf;
1317         struct btrfs_file_extent_item *fi;
1318         int i;
1319         int nritems;
1320         int ret;
1321
1322         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
1323         leaf = btrfs_buffer_leaf(cur);
1324         nritems = btrfs_header_nritems(&leaf->header);
1325         for (i = 0; i < nritems; i++) {
1326                 u64 disk_blocknr;
1327                 key = &leaf->items[i].key;
1328                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
1329                         continue;
1330                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1331                 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
1332                         continue;
1333                 /*
1334                  * FIXME make sure to insert a trans record that
1335                  * repeats the snapshot del on crash
1336                  */
1337                 disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
1338                 if (disk_blocknr == 0)
1339                         continue;
1340                 ret = btrfs_free_extent(trans, root, disk_blocknr,
1341                                         btrfs_file_extent_disk_num_blocks(fi),
1342                                         0);
1343                 BUG_ON(ret);
1344         }
1345         return 0;
1346 }
1347
1348 /*
1349  * helper function for drop_snapshot, this walks down the tree dropping ref
1350  * counts as it goes.
1351  */
1352 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1353                           *root, struct btrfs_path *path, int *level)
1354 {
1355         struct buffer_head *next;
1356         struct buffer_head *cur;
1357         u64 blocknr;
1358         int ret;
1359         u32 refs;
1360
1361         WARN_ON(*level < 0);
1362         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1363         ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1364                                1, &refs);
1365         BUG_ON(ret);
1366         if (refs > 1)
1367                 goto out;
1368         /*
1369          * walk down to the last node level and free all the leaves
1370          */
1371         while(*level >= 0) {
1372                 WARN_ON(*level < 0);
1373                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1374                 cur = path->nodes[*level];
1375                 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
1376                         WARN_ON(1);
1377                 if (path->slots[*level] >=
1378                     btrfs_header_nritems(btrfs_buffer_header(cur)))
1379                         break;
1380                 if (*level == 0) {
1381                         ret = drop_leaf_ref(trans, root, cur);
1382                         BUG_ON(ret);
1383                         break;
1384                 }
1385                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
1386                                               path->slots[*level]);
1387                 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1388                 BUG_ON(ret);
1389                 if (refs != 1) {
1390                         path->slots[*level]++;
1391                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
1392                         BUG_ON(ret);
1393                         continue;
1394                 }
1395                 next = read_tree_block(root, blocknr);
1396                 WARN_ON(*level <= 0);
1397                 if (path->nodes[*level-1])
1398                         btrfs_block_release(root, path->nodes[*level-1]);
1399                 path->nodes[*level-1] = next;
1400                 *level = btrfs_header_level(btrfs_buffer_header(next));
1401                 path->slots[*level] = 0;
1402         }
1403 out:
1404         WARN_ON(*level < 0);
1405         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1406         ret = btrfs_free_extent(trans, root,
1407                                 bh_blocknr(path->nodes[*level]), 1, 1);
1408         btrfs_block_release(root, path->nodes[*level]);
1409         path->nodes[*level] = NULL;
1410         *level += 1;
1411         BUG_ON(ret);
1412         return 0;
1413 }
1414
1415 /*
1416  * helper for dropping snapshots.  This walks back up the tree in the path
1417  * to find the first node higher up where we haven't yet gone through
1418  * all the slots
1419  */
1420 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1421                         *root, struct btrfs_path *path, int *level)
1422 {
1423         int i;
1424         int slot;
1425         int ret;
1426         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1427                 slot = path->slots[i];
1428                 if (slot < btrfs_header_nritems(
1429                     btrfs_buffer_header(path->nodes[i])) - 1) {
1430                         path->slots[i]++;
1431                         *level = i;
1432                         return 0;
1433                 } else {
1434                         ret = btrfs_free_extent(trans, root,
1435                                                 bh_blocknr(path->nodes[*level]),
1436                                                 1, 1);
1437                         BUG_ON(ret);
1438                         btrfs_block_release(root, path->nodes[*level]);
1439                         path->nodes[*level] = NULL;
1440                         *level = i + 1;
1441                 }
1442         }
1443         return 1;
1444 }
1445
1446 /*
1447  * drop the reference count on the tree rooted at 'snap'.  This traverses
1448  * the tree freeing any blocks that have a ref count of zero after being
1449  * decremented.
1450  */
1451 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1452                         *root, struct buffer_head *snap)
1453 {
1454         int ret = 0;
1455         int wret;
1456         int level;
1457         struct btrfs_path *path;
1458         int i;
1459         int orig_level;
1460
1461         path = btrfs_alloc_path();
1462         BUG_ON(!path);
1463
1464         level = btrfs_header_level(btrfs_buffer_header(snap));
1465         orig_level = level;
1466         path->nodes[level] = snap;
1467         path->slots[level] = 0;
1468         while(1) {
1469                 wret = walk_down_tree(trans, root, path, &level);
1470                 if (wret > 0)
1471                         break;
1472                 if (wret < 0)
1473                         ret = wret;
1474
1475                 wret = walk_up_tree(trans, root, path, &level);
1476                 if (wret > 0)
1477                         break;
1478                 if (wret < 0)
1479                         ret = wret;
1480                 btrfs_btree_balance_dirty(root);
1481         }
1482         for (i = 0; i <= orig_level; i++) {
1483                 if (path->nodes[i]) {
1484                         btrfs_block_release(root, path->nodes[i]);
1485                 }
1486         }
1487         btrfs_free_path(path);
1488         return ret;
1489 }
1490
1491 static int free_block_group_radix(struct radix_tree_root *radix)
1492 {
1493         int ret;
1494         struct btrfs_block_group_cache *cache[8];
1495         int i;
1496
1497         while(1) {
1498                 ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
1499                                              ARRAY_SIZE(cache));
1500                 if (!ret)
1501                         break;
1502                 for (i = 0; i < ret; i++) {
1503                         radix_tree_delete(radix, cache[i]->key.objectid +
1504                                           cache[i]->key.offset - 1);
1505                         kfree(cache[i]);
1506                 }
1507         }
1508         return 0;
1509 }
1510
1511 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1512 {
1513         int ret;
1514         int ret2;
1515         unsigned long gang[16];
1516         int i;
1517
1518         ret = free_block_group_radix(&info->block_group_radix);
1519         ret2 = free_block_group_radix(&info->block_group_data_radix);
1520         if (ret)
1521                 return ret;
1522         if (ret2)
1523                 return ret2;
1524
1525         while(1) {
1526                 ret = find_first_radix_bit(&info->extent_map_radix,
1527                                            gang, 0, ARRAY_SIZE(gang));
1528                 if (!ret)
1529                         break;
1530                 for (i = 0; i < ret; i++) {
1531                         clear_radix_bit(&info->extent_map_radix, gang[i]);
1532                 }
1533         }
1534         return 0;
1535 }
1536
1537 int btrfs_read_block_groups(struct btrfs_root *root)
1538 {
1539         struct btrfs_path *path;
1540         int ret;
1541         int err = 0;
1542         struct btrfs_block_group_item *bi;
1543         struct btrfs_block_group_cache *cache;
1544         struct btrfs_fs_info *info = root->fs_info;
1545         struct radix_tree_root *radix;
1546         struct btrfs_key key;
1547         struct btrfs_key found_key;
1548         struct btrfs_leaf *leaf;
1549         u64 group_size_blocks;
1550         u64 used;
1551
1552         group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >>
1553                 root->fs_info->sb->s_blocksize_bits;
1554         root = info->extent_root;
1555         key.objectid = 0;
1556         key.offset = group_size_blocks;
1557         key.flags = 0;
1558         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1559
1560         path = btrfs_alloc_path();
1561         if (!path)
1562                 return -ENOMEM;
1563
1564         while(1) {
1565                 ret = btrfs_search_slot(NULL, info->extent_root,
1566                                         &key, path, 0, 0);
1567                 if (ret != 0) {
1568                         err = ret;
1569                         break;
1570                 }
1571                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1572                 btrfs_disk_key_to_cpu(&found_key,
1573                                       &leaf->items[path->slots[0]].key);
1574                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1575                 if (!cache) {
1576                         err = -1;
1577                         break;
1578                 }
1579
1580                 bi = btrfs_item_ptr(leaf, path->slots[0],
1581                                     struct btrfs_block_group_item);
1582                 if (bi->flags & BTRFS_BLOCK_GROUP_DATA) {
1583                         radix = &info->block_group_data_radix;
1584                         cache->data = 1;
1585                 } else {
1586                         radix = &info->block_group_radix;
1587                         cache->data = 0;
1588                 }
1589
1590                 memcpy(&cache->item, bi, sizeof(*bi));
1591                 memcpy(&cache->key, &found_key, sizeof(found_key));
1592                 cache->last_alloc = cache->key.objectid;
1593                 cache->first_free = cache->key.objectid;
1594                 cache->last_prealloc = cache->key.objectid;
1595                 cache->pinned = 0;
1596                 cache->cached = 0;
1597
1598                 cache->radix = radix;
1599
1600                 key.objectid = found_key.objectid + found_key.offset;
1601                 btrfs_release_path(root, path);
1602                 ret = radix_tree_insert(radix, found_key.objectid +
1603                                         found_key.offset - 1,
1604                                         (void *)cache);
1605                 BUG_ON(ret);
1606                 used = btrfs_block_group_used(bi);
1607                 if (used < div_factor(key.offset, 8)) {
1608                         radix_tree_tag_set(radix, found_key.objectid +
1609                                            found_key.offset - 1,
1610                                            BTRFS_BLOCK_GROUP_AVAIL);
1611                 }
1612                 if (key.objectid >=
1613                     btrfs_super_total_blocks(info->disk_super))
1614                         break;
1615         }
1616
1617         btrfs_free_path(path);
1618         return 0;
1619 }