Btrfs: add GPLv2
[safe/jmp/linux-2.6] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/module.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "print-tree.h"
23 #include "transaction.h"
24
25 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
26                             *orig_root, u64 num_blocks, u64 search_start,
27                             u64 search_end, u64 hint_block,
28                             struct btrfs_key *ins, int data);
29 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
30                                  btrfs_root *extent_root);
31 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
32                                btrfs_root *extent_root);
33
34 static void reada_extent_leaves(struct btrfs_root *root,
35                                 struct btrfs_path *path, u64 limit)
36 {
37         struct btrfs_node *node;
38         int i;
39         int nritems;
40         u64 item_objectid;
41         u64 blocknr;
42         int slot;
43         int ret;
44
45         if (!path->nodes[1])
46                 return;
47         node = btrfs_buffer_node(path->nodes[1]);
48         slot = path->slots[1] + 1;
49         nritems = btrfs_header_nritems(&node->header);
50         for (i = slot; i < nritems && i < slot + 8; i++) {
51                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
52                 if (item_objectid > limit)
53                         break;
54                 blocknr = btrfs_node_blockptr(node, i);
55                 ret = readahead_tree_block(root, blocknr);
56                 if (ret)
57                         break;
58         }
59 }
60
61 static int cache_block_group(struct btrfs_root *root,
62                              struct btrfs_block_group_cache *block_group)
63 {
64         struct btrfs_path *path;
65         int ret;
66         struct btrfs_key key;
67         struct btrfs_leaf *leaf;
68         struct radix_tree_root *extent_radix;
69         int slot;
70         u64 i;
71         u64 last = 0;
72         u64 hole_size;
73         u64 limit;
74         int found = 0;
75
76         root = root->fs_info->extent_root;
77         extent_radix = &root->fs_info->extent_map_radix;
78
79         if (block_group->cached)
80                 return 0;
81         if (block_group->data)
82                 return 0;
83         path = btrfs_alloc_path();
84         if (!path)
85                 return -ENOMEM;
86         key.objectid = block_group->key.objectid;
87         key.flags = 0;
88         key.offset = 0;
89         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
90         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
91         if (ret < 0)
92                 return ret;
93         if (ret && path->slots[0] > 0)
94                 path->slots[0]--;
95         limit = block_group->key.objectid + block_group->key.offset;
96         reada_extent_leaves(root, path, limit);
97         while(1) {
98                 leaf = btrfs_buffer_leaf(path->nodes[0]);
99                 slot = path->slots[0];
100                 if (slot >= btrfs_header_nritems(&leaf->header)) {
101                         reada_extent_leaves(root, path, limit);
102                         ret = btrfs_next_leaf(root, path);
103                         if (ret == 0) {
104                                 continue;
105                         } else {
106                                 if (found) {
107                                         hole_size = block_group->key.objectid +
108                                                 block_group->key.offset - last;
109                                 } else {
110                                         last = block_group->key.objectid;
111                                         hole_size = block_group->key.offset;
112                                 }
113                                 for (i = 0; i < hole_size; i++) {
114                                         set_radix_bit(extent_radix,
115                                                       last + i);
116                                 }
117                                 break;
118                         }
119                 }
120                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
121                 if (key.objectid >= block_group->key.objectid +
122                     block_group->key.offset) {
123                         if (found) {
124                                 hole_size = block_group->key.objectid +
125                                         block_group->key.offset - last;
126                         } else {
127                                 last = block_group->key.objectid;
128                                 hole_size = block_group->key.offset;
129                         }
130                         for (i = 0; i < hole_size; i++) {
131                                 set_radix_bit(extent_radix, last + i);
132                         }
133                         break;
134                 }
135                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
136                         if (!found) {
137                                 last = key.objectid + key.offset;
138                                 found = 1;
139                         } else {
140                                 hole_size = key.objectid - last;
141                                 for (i = 0; i < hole_size; i++) {
142                                         set_radix_bit(extent_radix, last + i);
143                                 }
144                                 last = key.objectid + key.offset;
145                         }
146                 }
147                 path->slots[0]++;
148         }
149
150         block_group->cached = 1;
151         btrfs_free_path(path);
152         return 0;
153 }
154
155 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
156                                                          btrfs_fs_info *info,
157                                                          u64 blocknr)
158 {
159         struct btrfs_block_group_cache *block_group;
160         int ret;
161
162         ret = radix_tree_gang_lookup(&info->block_group_radix,
163                                      (void **)&block_group,
164                                      blocknr, 1);
165         if (ret) {
166                 if (block_group->key.objectid <= blocknr && blocknr <=
167                     block_group->key.objectid + block_group->key.offset)
168                         return block_group;
169         }
170         ret = radix_tree_gang_lookup(&info->block_group_data_radix,
171                                      (void **)&block_group,
172                                      blocknr, 1);
173         if (ret) {
174                 if (block_group->key.objectid <= blocknr && blocknr <=
175                     block_group->key.objectid + block_group->key.offset)
176                         return block_group;
177         }
178         return NULL;
179 }
180
181 static u64 leaf_range(struct btrfs_root *root)
182 {
183         u64 size = BTRFS_LEAF_DATA_SIZE(root);
184         do_div(size, sizeof(struct btrfs_extent_item) +
185                 sizeof(struct btrfs_item));
186         return size;
187 }
188
189 static u64 find_search_start(struct btrfs_root *root,
190                              struct btrfs_block_group_cache **cache_ret,
191                              u64 search_start, int num)
192 {
193         unsigned long gang[8];
194         int ret;
195         struct btrfs_block_group_cache *cache = *cache_ret;
196         u64 last = max(search_start, cache->key.objectid);
197
198         if (cache->data)
199                 goto out;
200         if (num > 1) {
201                 last = max(last, cache->last_prealloc);
202         }
203 again:
204         cache_block_group(root, cache);
205         while(1) {
206                 ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
207                                            gang, last, ARRAY_SIZE(gang));
208                 if (!ret)
209                         goto out;
210                 last = gang[ret-1] + 1;
211                 if (num > 1) {
212                         if (ret != ARRAY_SIZE(gang)) {
213                                 goto new_group;
214                         }
215                         if (gang[ret-1] - gang[0] > leaf_range(root)) {
216                                 continue;
217                         }
218                 }
219                 if (gang[0] >= cache->key.objectid + cache->key.offset) {
220                         goto new_group;
221                 }
222                 return gang[0];
223         }
224 out:
225         return max(cache->last_alloc, search_start);
226
227 new_group:
228         cache = btrfs_lookup_block_group(root->fs_info,
229                                          last + cache->key.offset - 1);
230         if (!cache) {
231                 return max((*cache_ret)->last_alloc, search_start);
232         }
233         cache = btrfs_find_block_group(root, cache,
234                                        last + cache->key.offset - 1, 0, 0);
235         *cache_ret = cache;
236         goto again;
237 }
238
239 static u64 div_factor(u64 num, int factor)
240 {
241         num *= factor;
242         do_div(num, 10);
243         return num;
244 }
245
246 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
247                                                  struct btrfs_block_group_cache
248                                                  *hint, u64 search_start,
249                                                  int data, int owner)
250 {
251         struct btrfs_block_group_cache *cache[8];
252         struct btrfs_block_group_cache *found_group = NULL;
253         struct btrfs_fs_info *info = root->fs_info;
254         struct radix_tree_root *radix;
255         struct radix_tree_root *swap_radix;
256         u64 used;
257         u64 last = 0;
258         u64 hint_last;
259         int i;
260         int ret;
261         int full_search = 0;
262         int factor = 8;
263         int data_swap = 0;
264
265         if (!owner)
266                 factor = 5;
267
268         if (data) {
269                 radix = &info->block_group_data_radix;
270                 swap_radix = &info->block_group_radix;
271         } else {
272                 radix = &info->block_group_radix;
273                 swap_radix = &info->block_group_data_radix;
274         }
275
276         if (search_start) {
277                 struct btrfs_block_group_cache *shint;
278                 shint = btrfs_lookup_block_group(info, search_start);
279                 if (shint->data == data) {
280                         used = btrfs_block_group_used(&shint->item);
281                         if (used + shint->pinned <
282                             div_factor(shint->key.offset, factor)) {
283                                 return shint;
284                         }
285                 }
286         }
287         if (hint && hint->data == data) {
288                 used = btrfs_block_group_used(&hint->item);
289                 if (used + hint->pinned <
290                     div_factor(hint->key.offset, factor)) {
291                         return hint;
292                 }
293                 if (used >= div_factor(hint->key.offset, 8)) {
294                         radix_tree_tag_clear(radix,
295                                              hint->key.objectid +
296                                              hint->key.offset - 1,
297                                              BTRFS_BLOCK_GROUP_AVAIL);
298                 }
299                 last = hint->key.offset * 3;
300                 if (hint->key.objectid >= last)
301                         last = max(search_start + hint->key.offset - 1,
302                                    hint->key.objectid - last);
303                 else
304                         last = hint->key.objectid + hint->key.offset;
305                 hint_last = last;
306         } else {
307                 if (hint)
308                         hint_last = max(hint->key.objectid, search_start);
309                 else
310                         hint_last = search_start;
311
312                 last = hint_last;
313         }
314         while(1) {
315                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
316                                                  last, ARRAY_SIZE(cache),
317                                                  BTRFS_BLOCK_GROUP_AVAIL);
318                 if (!ret)
319                         break;
320                 for (i = 0; i < ret; i++) {
321                         last = cache[i]->key.objectid +
322                                 cache[i]->key.offset;
323                         used = btrfs_block_group_used(&cache[i]->item);
324                         if (used + cache[i]->pinned <
325                             div_factor(cache[i]->key.offset, factor)) {
326                                 found_group = cache[i];
327                                 goto found;
328                         }
329                         if (used >= div_factor(cache[i]->key.offset, 8)) {
330                                 radix_tree_tag_clear(radix,
331                                                      cache[i]->key.objectid +
332                                                      cache[i]->key.offset - 1,
333                                                      BTRFS_BLOCK_GROUP_AVAIL);
334                         }
335                 }
336                 cond_resched();
337         }
338         last = hint_last;
339 again:
340         while(1) {
341                 ret = radix_tree_gang_lookup(radix, (void **)cache,
342                                              last, ARRAY_SIZE(cache));
343                 if (!ret)
344                         break;
345                 for (i = 0; i < ret; i++) {
346                         last = cache[i]->key.objectid +
347                                 cache[i]->key.offset;
348                         used = btrfs_block_group_used(&cache[i]->item);
349                         if (used + cache[i]->pinned < cache[i]->key.offset) {
350                                 found_group = cache[i];
351                                 goto found;
352                         }
353                         if (used >= cache[i]->key.offset) {
354                                 radix_tree_tag_clear(radix,
355                                                      cache[i]->key.objectid +
356                                                      cache[i]->key.offset - 1,
357                                                      BTRFS_BLOCK_GROUP_AVAIL);
358                         }
359                 }
360                 cond_resched();
361         }
362         if (!full_search) {
363                 last = search_start;
364                 full_search = 1;
365                 goto again;
366         }
367         if (!data_swap) {
368                 struct radix_tree_root *tmp = radix;
369                 data_swap = 1;
370                 radix = swap_radix;
371                 swap_radix = tmp;
372                 last = search_start;
373                 goto again;
374         }
375         if (!found_group) {
376                 ret = radix_tree_gang_lookup(radix,
377                                              (void **)&found_group, 0, 1);
378                 if (ret == 0) {
379                         ret = radix_tree_gang_lookup(swap_radix,
380                                                      (void **)&found_group,
381                                                      0, 1);
382                 }
383                 BUG_ON(ret != 1);
384         }
385 found:
386         return found_group;
387 }
388
389 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
390                                 struct btrfs_root *root,
391                                 u64 blocknr, u64 num_blocks)
392 {
393         struct btrfs_path *path;
394         int ret;
395         struct btrfs_key key;
396         struct btrfs_leaf *l;
397         struct btrfs_extent_item *item;
398         struct btrfs_key ins;
399         u32 refs;
400
401         find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, 0,
402                          &ins, 0);
403         path = btrfs_alloc_path();
404         BUG_ON(!path);
405         btrfs_init_path(path);
406         key.objectid = blocknr;
407         key.flags = 0;
408         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
409         key.offset = num_blocks;
410         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
411                                 0, 1);
412         if (ret != 0) {
413                 BUG();
414         }
415         BUG_ON(ret != 0);
416         l = btrfs_buffer_leaf(path->nodes[0]);
417         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
418         refs = btrfs_extent_refs(item);
419         btrfs_set_extent_refs(item, refs + 1);
420         btrfs_mark_buffer_dirty(path->nodes[0]);
421
422         btrfs_release_path(root->fs_info->extent_root, path);
423         btrfs_free_path(path);
424         finish_current_insert(trans, root->fs_info->extent_root);
425         del_pending_extents(trans, root->fs_info->extent_root);
426         return 0;
427 }
428
429 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
430                              struct btrfs_root *root, u64 blocknr,
431                              u64 num_blocks, u32 *refs)
432 {
433         struct btrfs_path *path;
434         int ret;
435         struct btrfs_key key;
436         struct btrfs_leaf *l;
437         struct btrfs_extent_item *item;
438
439         path = btrfs_alloc_path();
440         btrfs_init_path(path);
441         key.objectid = blocknr;
442         key.offset = num_blocks;
443         key.flags = 0;
444         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
445         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
446                                 0, 0);
447         if (ret != 0)
448                 BUG();
449         l = btrfs_buffer_leaf(path->nodes[0]);
450         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
451         *refs = btrfs_extent_refs(item);
452         btrfs_release_path(root->fs_info->extent_root, path);
453         btrfs_free_path(path);
454         return 0;
455 }
456
457 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
458                        struct btrfs_root *root)
459 {
460         return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
461 }
462
463 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
464                   struct buffer_head *buf)
465 {
466         u64 blocknr;
467         struct btrfs_node *buf_node;
468         struct btrfs_leaf *buf_leaf;
469         struct btrfs_disk_key *key;
470         struct btrfs_file_extent_item *fi;
471         int i;
472         int leaf;
473         int ret;
474
475         if (!root->ref_cows)
476                 return 0;
477         buf_node = btrfs_buffer_node(buf);
478         leaf = btrfs_is_leaf(buf_node);
479         buf_leaf = btrfs_buffer_leaf(buf);
480         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
481                 if (leaf) {
482                         u64 disk_blocknr;
483                         key = &buf_leaf->items[i].key;
484                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
485                                 continue;
486                         fi = btrfs_item_ptr(buf_leaf, i,
487                                             struct btrfs_file_extent_item);
488                         if (btrfs_file_extent_type(fi) ==
489                             BTRFS_FILE_EXTENT_INLINE)
490                                 continue;
491                         disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
492                         if (disk_blocknr == 0)
493                                 continue;
494                         ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
495                                     btrfs_file_extent_disk_num_blocks(fi));
496                         BUG_ON(ret);
497                 } else {
498                         blocknr = btrfs_node_blockptr(buf_node, i);
499                         ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
500                         BUG_ON(ret);
501                 }
502         }
503         return 0;
504 }
505
506 static int write_one_cache_group(struct btrfs_trans_handle *trans,
507                                  struct btrfs_root *root,
508                                  struct btrfs_path *path,
509                                  struct btrfs_block_group_cache *cache)
510 {
511         int ret;
512         int pending_ret;
513         struct btrfs_root *extent_root = root->fs_info->extent_root;
514         struct btrfs_block_group_item *bi;
515         struct btrfs_key ins;
516
517         find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0);
518         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
519         BUG_ON(ret);
520         bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
521                             struct btrfs_block_group_item);
522         memcpy(bi, &cache->item, sizeof(*bi));
523         mark_buffer_dirty(path->nodes[0]);
524         btrfs_release_path(extent_root, path);
525
526         finish_current_insert(trans, extent_root);
527         pending_ret = del_pending_extents(trans, extent_root);
528         if (ret)
529                 return ret;
530         if (pending_ret)
531                 return pending_ret;
532         if (cache->data)
533                 cache->last_alloc = cache->first_free;
534         return 0;
535
536 }
537
538 static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
539                                    struct btrfs_root *root,
540                                    struct radix_tree_root *radix)
541 {
542         struct btrfs_block_group_cache *cache[8];
543         int ret;
544         int err = 0;
545         int werr = 0;
546         int i;
547         struct btrfs_path *path;
548
549         path = btrfs_alloc_path();
550         if (!path)
551                 return -ENOMEM;
552
553         while(1) {
554                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
555                                                  0, ARRAY_SIZE(cache),
556                                                  BTRFS_BLOCK_GROUP_DIRTY);
557                 if (!ret)
558                         break;
559                 for (i = 0; i < ret; i++) {
560                         radix_tree_tag_clear(radix, cache[i]->key.objectid +
561                                              cache[i]->key.offset - 1,
562                                              BTRFS_BLOCK_GROUP_DIRTY);
563                         err = write_one_cache_group(trans, root,
564                                                     path, cache[i]);
565                         if (err)
566                                 werr = err;
567                 }
568         }
569         btrfs_free_path(path);
570         return werr;
571 }
572
573 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
574                                    struct btrfs_root *root)
575 {
576         int ret;
577         int ret2;
578         ret = write_dirty_block_radix(trans, root,
579                                       &root->fs_info->block_group_radix);
580         ret2 = write_dirty_block_radix(trans, root,
581                                       &root->fs_info->block_group_data_radix);
582         if (ret)
583                 return ret;
584         if (ret2)
585                 return ret2;
586         return 0;
587 }
588
589 static int update_block_group(struct btrfs_trans_handle *trans,
590                               struct btrfs_root *root,
591                               u64 blocknr, u64 num, int alloc, int mark_free,
592                               int data)
593 {
594         struct btrfs_block_group_cache *cache;
595         struct btrfs_fs_info *info = root->fs_info;
596         u64 total = num;
597         u64 old_val;
598         u64 block_in_group;
599         u64 i;
600         int ret;
601
602         while(total) {
603                 cache = btrfs_lookup_block_group(info, blocknr);
604                 if (!cache) {
605                         return -1;
606                 }
607                 block_in_group = blocknr - cache->key.objectid;
608                 WARN_ON(block_in_group > cache->key.offset);
609                 radix_tree_tag_set(cache->radix, cache->key.objectid +
610                                    cache->key.offset - 1,
611                                    BTRFS_BLOCK_GROUP_DIRTY);
612
613                 old_val = btrfs_block_group_used(&cache->item);
614                 num = min(total, cache->key.offset - block_in_group);
615                 if (alloc) {
616                         if (blocknr > cache->last_alloc)
617                                 cache->last_alloc = blocknr;
618                         if (!cache->data) {
619                                 for (i = 0; i < num; i++) {
620                                         clear_radix_bit(&info->extent_map_radix,
621                                                         blocknr + i);
622                                 }
623                         }
624                         if (cache->data != data &&
625                             old_val < (cache->key.offset >> 1)) {
626                                 cache->data = data;
627                                 radix_tree_delete(cache->radix,
628                                                   cache->key.objectid +
629                                                   cache->key.offset - 1);
630
631                                 if (data) {
632                                         cache->radix =
633                                                 &info->block_group_data_radix;
634                                         cache->item.flags |=
635                                                 BTRFS_BLOCK_GROUP_DATA;
636                                 } else {
637                                         cache->radix = &info->block_group_radix;
638                                         cache->item.flags &=
639                                                 ~BTRFS_BLOCK_GROUP_DATA;
640                                 }
641                                 ret = radix_tree_insert(cache->radix,
642                                                         cache->key.objectid +
643                                                         cache->key.offset - 1,
644                                                         (void *)cache);
645                         }
646                         old_val += num;
647                 } else {
648                         old_val -= num;
649                         if (blocknr < cache->first_free)
650                                 cache->first_free = blocknr;
651                         if (!cache->data && mark_free) {
652                                 for (i = 0; i < num; i++) {
653                                         set_radix_bit(&info->extent_map_radix,
654                                                       blocknr + i);
655                                 }
656                         }
657                         if (old_val < (cache->key.offset >> 1) &&
658                             old_val + num >= (cache->key.offset >> 1)) {
659                                 radix_tree_tag_set(cache->radix,
660                                                    cache->key.objectid +
661                                                    cache->key.offset - 1,
662                                                    BTRFS_BLOCK_GROUP_AVAIL);
663                         }
664                 }
665                 btrfs_set_block_group_used(&cache->item, old_val);
666                 total -= num;
667                 blocknr += num;
668         }
669         return 0;
670 }
671
672 static int try_remove_page(struct address_space *mapping, unsigned long index)
673 {
674         int ret;
675         ret = invalidate_mapping_pages(mapping, index, index);
676         return ret;
677 }
678
679 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
680                                btrfs_root *root)
681 {
682         unsigned long gang[8];
683         struct inode *btree_inode = root->fs_info->btree_inode;
684         struct btrfs_block_group_cache *block_group;
685         u64 first = 0;
686         int ret;
687         int i;
688         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
689         struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
690
691         while(1) {
692                 ret = find_first_radix_bit(pinned_radix, gang, 0,
693                                            ARRAY_SIZE(gang));
694                 if (!ret)
695                         break;
696                 if (!first)
697                         first = gang[0];
698                 for (i = 0; i < ret; i++) {
699                         clear_radix_bit(pinned_radix, gang[i]);
700                         block_group = btrfs_lookup_block_group(root->fs_info,
701                                                                gang[i]);
702                         if (block_group) {
703                                 WARN_ON(block_group->pinned == 0);
704                                 block_group->pinned--;
705                                 if (gang[i] < block_group->last_alloc)
706                                         block_group->last_alloc = gang[i];
707                                 if (gang[i] < block_group->last_prealloc)
708                                         block_group->last_prealloc = gang[i];
709                                 if (!block_group->data)
710                                         set_radix_bit(extent_radix, gang[i]);
711                         }
712                         try_remove_page(btree_inode->i_mapping,
713                                         gang[i] << (PAGE_CACHE_SHIFT -
714                                                     btree_inode->i_blkbits));
715                 }
716         }
717         return 0;
718 }
719
720 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
721                                  btrfs_root *extent_root)
722 {
723         struct btrfs_key ins;
724         struct btrfs_extent_item extent_item;
725         int i;
726         int ret;
727         u64 super_blocks_used;
728         struct btrfs_fs_info *info = extent_root->fs_info;
729
730         btrfs_set_extent_refs(&extent_item, 1);
731         ins.offset = 1;
732         ins.flags = 0;
733         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
734         btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
735
736         for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
737                 ins.objectid = extent_root->fs_info->extent_tree_insert[i];
738                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
739                 btrfs_set_super_blocks_used(info->disk_super,
740                                             super_blocks_used + 1);
741                 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
742                                         sizeof(extent_item));
743                 BUG_ON(ret);
744         }
745         extent_root->fs_info->extent_tree_insert_nr = 0;
746         extent_root->fs_info->extent_tree_prealloc_nr = 0;
747         return 0;
748 }
749
750 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
751 {
752         int err;
753         struct btrfs_header *header;
754         struct buffer_head *bh;
755
756         if (!pending) {
757                 bh = btrfs_find_tree_block(root, blocknr);
758                 if (bh) {
759                         if (buffer_uptodate(bh)) {
760                                 u64 transid =
761                                     root->fs_info->running_transaction->transid;
762                                 header = btrfs_buffer_header(bh);
763                                 if (btrfs_header_generation(header) ==
764                                     transid) {
765                                         btrfs_block_release(root, bh);
766                                         return 0;
767                                 }
768                         }
769                         btrfs_block_release(root, bh);
770                 }
771                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
772                 if (!err) {
773                         struct btrfs_block_group_cache *cache;
774                         cache = btrfs_lookup_block_group(root->fs_info,
775                                                          blocknr);
776                         if (cache)
777                                 cache->pinned++;
778                 }
779         } else {
780                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
781         }
782         BUG_ON(err < 0);
783         return 0;
784 }
785
786 /*
787  * remove an extent from the root, returns 0 on success
788  */
789 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
790                          *root, u64 blocknr, u64 num_blocks, int pin,
791                          int mark_free)
792 {
793         struct btrfs_path *path;
794         struct btrfs_key key;
795         struct btrfs_fs_info *info = root->fs_info;
796         struct btrfs_root *extent_root = info->extent_root;
797         int ret;
798         struct btrfs_extent_item *ei;
799         struct btrfs_key ins;
800         u32 refs;
801
802         key.objectid = blocknr;
803         key.flags = 0;
804         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
805         key.offset = num_blocks;
806
807         find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0);
808         path = btrfs_alloc_path();
809         BUG_ON(!path);
810         btrfs_init_path(path);
811
812         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
813         if (ret) {
814                 BUG();
815         }
816         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
817                             struct btrfs_extent_item);
818         BUG_ON(ei->refs == 0);
819         refs = btrfs_extent_refs(ei) - 1;
820         btrfs_set_extent_refs(ei, refs);
821         btrfs_mark_buffer_dirty(path->nodes[0]);
822         if (refs == 0) {
823                 u64 super_blocks_used;
824
825                 if (pin) {
826                         ret = pin_down_block(root, blocknr, 0);
827                         BUG_ON(ret);
828                 }
829
830                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
831                 btrfs_set_super_blocks_used(info->disk_super,
832                                             super_blocks_used - num_blocks);
833                 ret = btrfs_del_item(trans, extent_root, path);
834                 if (ret)
835                         BUG();
836                 ret = update_block_group(trans, root, blocknr, num_blocks, 0,
837                                          mark_free, 0);
838                 BUG_ON(ret);
839         }
840         btrfs_free_path(path);
841         finish_current_insert(trans, extent_root);
842         return ret;
843 }
844
845 /*
846  * find all the blocks marked as pending in the radix tree and remove
847  * them from the extent map
848  */
849 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
850                                btrfs_root *extent_root)
851 {
852         int ret;
853         int wret;
854         int err = 0;
855         unsigned long gang[4];
856         int i;
857         struct radix_tree_root *pending_radix;
858         struct radix_tree_root *pinned_radix;
859         struct btrfs_block_group_cache *cache;
860
861         pending_radix = &extent_root->fs_info->pending_del_radix;
862         pinned_radix = &extent_root->fs_info->pinned_radix;
863
864         while(1) {
865                 ret = find_first_radix_bit(pending_radix, gang, 0,
866                                            ARRAY_SIZE(gang));
867                 if (!ret)
868                         break;
869                 for (i = 0; i < ret; i++) {
870                         wret = set_radix_bit(pinned_radix, gang[i]);
871                         if (wret == 0) {
872                                 cache =
873                                   btrfs_lookup_block_group(extent_root->fs_info,
874                                                            gang[i]);
875                                 if (cache)
876                                         cache->pinned++;
877                         }
878                         if (wret < 0) {
879                                 printk(KERN_CRIT "set_radix_bit, err %d\n",
880                                        wret);
881                                 BUG_ON(wret < 0);
882                         }
883                         wret = clear_radix_bit(pending_radix, gang[i]);
884                         BUG_ON(wret);
885                         wret = __free_extent(trans, extent_root,
886                                              gang[i], 1, 0, 0);
887                         if (wret)
888                                 err = wret;
889                 }
890         }
891         return err;
892 }
893
894 /*
895  * remove an extent from the root, returns 0 on success
896  */
897 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
898                       *root, u64 blocknr, u64 num_blocks, int pin)
899 {
900         struct btrfs_root *extent_root = root->fs_info->extent_root;
901         int pending_ret;
902         int ret;
903
904         if (root == extent_root) {
905                 pin_down_block(root, blocknr, 1);
906                 return 0;
907         }
908         ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
909         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
910         return ret ? ret : pending_ret;
911 }
912
913 /*
914  * walks the btree of allocated extents and find a hole of a given size.
915  * The key ins is changed to record the hole:
916  * ins->objectid == block start
917  * ins->flags = BTRFS_EXTENT_ITEM_KEY
918  * ins->offset == number of blocks
919  * Any available blocks before search_start are skipped.
920  */
921 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
922                             *orig_root, u64 num_blocks, u64 search_start, u64
923                             search_end, u64 hint_block,
924                             struct btrfs_key *ins, int data)
925 {
926         struct btrfs_path *path;
927         struct btrfs_key key;
928         int ret;
929         u64 hole_size = 0;
930         int slot = 0;
931         u64 last_block = 0;
932         u64 test_block;
933         u64 orig_search_start = search_start;
934         int start_found;
935         struct btrfs_leaf *l;
936         struct btrfs_root * root = orig_root->fs_info->extent_root;
937         struct btrfs_fs_info *info = root->fs_info;
938         int total_needed = num_blocks;
939         int total_found = 0;
940         int fill_prealloc = 0;
941         int level;
942         struct btrfs_block_group_cache *block_group;
943         int full_scan = 0;
944         int wrapped = 0;
945         u64 limit;
946
947         path = btrfs_alloc_path();
948         ins->flags = 0;
949         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
950
951         level = btrfs_header_level(btrfs_buffer_header(root->node));
952         if (num_blocks == 0) {
953                 fill_prealloc = 1;
954                 num_blocks = 1;
955                 total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
956         }
957         if (search_end == (u64)-1)
958                 search_end = btrfs_super_total_blocks(info->disk_super);
959         if (hint_block) {
960                 block_group = btrfs_lookup_block_group(info, hint_block);
961                 block_group = btrfs_find_block_group(root, block_group,
962                                                      hint_block, data, 1);
963         } else {
964                 block_group = btrfs_find_block_group(root,
965                                                      trans->block_group, 0,
966                                                      data, 1);
967         }
968
969 check_failed:
970         if (!block_group->data)
971                 search_start = find_search_start(root, &block_group,
972                                                  search_start, total_needed);
973         else if (!full_scan)
974                 search_start = max(block_group->last_alloc, search_start);
975
976         btrfs_init_path(path);
977         ins->objectid = search_start;
978         ins->offset = 0;
979         start_found = 0;
980
981         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
982         if (ret < 0)
983                 goto error;
984
985         if (path->slots[0] > 0) {
986                 path->slots[0]--;
987         }
988
989         l = btrfs_buffer_leaf(path->nodes[0]);
990         btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
991         /*
992          * a rare case, go back one key if we hit a block group item
993          * instead of an extent item
994          */
995         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
996             key.objectid + key.offset >= search_start) {
997                 ins->objectid = key.objectid;
998                 ins->offset = key.offset - 1;
999                 btrfs_release_path(root, path);
1000                 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1001                 if (ret < 0)
1002                         goto error;
1003
1004                 if (path->slots[0] > 0) {
1005                         path->slots[0]--;
1006                 }
1007         }
1008
1009         while (1) {
1010                 l = btrfs_buffer_leaf(path->nodes[0]);
1011                 slot = path->slots[0];
1012                 if (slot >= btrfs_header_nritems(&l->header)) {
1013                         if (fill_prealloc) {
1014                                 info->extent_tree_prealloc_nr = 0;
1015                                 total_found = 0;
1016                         }
1017                         if (start_found)
1018                                 limit = last_block +
1019                                         (block_group->key.offset >> 1);
1020                         else
1021                                 limit = search_start +
1022                                         (block_group->key.offset >> 1);
1023                         ret = btrfs_next_leaf(root, path);
1024                         if (ret == 0)
1025                                 continue;
1026                         if (ret < 0)
1027                                 goto error;
1028                         if (!start_found) {
1029                                 ins->objectid = search_start;
1030                                 ins->offset = search_end - search_start;
1031                                 start_found = 1;
1032                                 goto check_pending;
1033                         }
1034                         ins->objectid = last_block > search_start ?
1035                                         last_block : search_start;
1036                         ins->offset = search_end - ins->objectid;
1037                         goto check_pending;
1038                 }
1039
1040                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
1041                 if (key.objectid >= search_start && key.objectid > last_block &&
1042                     start_found) {
1043                         if (last_block < search_start)
1044                                 last_block = search_start;
1045                         hole_size = key.objectid - last_block;
1046                         if (hole_size >= num_blocks) {
1047                                 ins->objectid = last_block;
1048                                 ins->offset = hole_size;
1049                                 goto check_pending;
1050                         }
1051                 }
1052
1053                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
1054                         goto next;
1055
1056                 start_found = 1;
1057                 last_block = key.objectid + key.offset;
1058                 if (!full_scan && last_block >= block_group->key.objectid +
1059                     block_group->key.offset) {
1060                         btrfs_release_path(root, path);
1061                         search_start = block_group->key.objectid +
1062                                 block_group->key.offset * 2;
1063                         goto new_group;
1064                 }
1065 next:
1066                 path->slots[0]++;
1067                 cond_resched();
1068         }
1069         // FIXME -ENOSPC
1070 check_pending:
1071         /* we have to make sure we didn't find an extent that has already
1072          * been allocated by the map tree or the original allocation
1073          */
1074         btrfs_release_path(root, path);
1075         BUG_ON(ins->objectid < search_start);
1076
1077         if (ins->objectid + num_blocks >= search_end) {
1078                 if (full_scan) {
1079                         ret = -ENOSPC;
1080                         goto error;
1081                 }
1082                 search_start = orig_search_start;
1083                 if (wrapped)
1084                         full_scan = 1;
1085                 else
1086                         wrapped = 1;
1087                 goto new_group;
1088         }
1089         for (test_block = ins->objectid;
1090              test_block < ins->objectid + num_blocks; test_block++) {
1091                 if (test_radix_bit(&info->pinned_radix, test_block)) {
1092                         search_start = test_block + 1;
1093                         goto new_group;
1094                 }
1095         }
1096         if (!fill_prealloc && info->extent_tree_insert_nr) {
1097                 u64 last =
1098                   info->extent_tree_insert[info->extent_tree_insert_nr - 1];
1099                 if (ins->objectid + num_blocks >
1100                     info->extent_tree_insert[0] &&
1101                     ins->objectid <= last) {
1102                         search_start = last + 1;
1103                         WARN_ON(!full_scan);
1104                         goto new_group;
1105                 }
1106         }
1107         if (!fill_prealloc && info->extent_tree_prealloc_nr) {
1108                 u64 first =
1109                   info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
1110                 if (ins->objectid + num_blocks > first &&
1111                     ins->objectid <= info->extent_tree_prealloc[0]) {
1112                         search_start = info->extent_tree_prealloc[0] + 1;
1113                         WARN_ON(!full_scan);
1114                         goto new_group;
1115                 }
1116         }
1117         if (fill_prealloc) {
1118                 int nr;
1119                 test_block = ins->objectid;
1120                 if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
1121                     leaf_range(root)) {
1122                         total_found = 0;
1123                         info->extent_tree_prealloc_nr = total_found;
1124                 }
1125                 while(test_block < ins->objectid + ins->offset &&
1126                       total_found < total_needed) {
1127                         nr = total_needed - total_found - 1;
1128                         BUG_ON(nr < 0);
1129                         info->extent_tree_prealloc[nr] = test_block;
1130                         total_found++;
1131                         test_block++;
1132                 }
1133                 if (total_found < total_needed) {
1134                         search_start = test_block;
1135                         goto new_group;
1136                 }
1137                 info->extent_tree_prealloc_nr = total_found;
1138         }
1139         if (!data) {
1140                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1141                 if (block_group) {
1142                         if (fill_prealloc)
1143                                 block_group->last_prealloc =
1144                                      info->extent_tree_prealloc[total_needed-1];
1145                         else
1146                                 trans->block_group = block_group;
1147                 }
1148         }
1149         ins->offset = num_blocks;
1150         btrfs_free_path(path);
1151         return 0;
1152
1153 new_group:
1154         if (search_start + num_blocks >= search_end) {
1155                 search_start = orig_search_start;
1156                 if (full_scan) {
1157                         ret = -ENOSPC;
1158                         goto error;
1159                 }
1160                 if (wrapped)
1161                         full_scan = 1;
1162                 else
1163                         wrapped = 1;
1164         }
1165         block_group = btrfs_lookup_block_group(info, search_start);
1166         cond_resched();
1167         if (!full_scan)
1168                 block_group = btrfs_find_block_group(root, block_group,
1169                                                      search_start, data, 0);
1170         goto check_failed;
1171
1172 error:
1173         btrfs_release_path(root, path);
1174         btrfs_free_path(path);
1175         return ret;
1176 }
1177 /*
1178  * finds a free extent and does all the dirty work required for allocation
1179  * returns the key for the extent through ins, and a tree buffer for
1180  * the first block of the extent through buf.
1181  *
1182  * returns 0 if everything worked, non-zero otherwise.
1183  */
1184 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1185                        struct btrfs_root *root, u64 owner,
1186                        u64 num_blocks, u64 hint_block,
1187                        u64 search_end, struct btrfs_key *ins, int data)
1188 {
1189         int ret;
1190         int pending_ret;
1191         u64 super_blocks_used;
1192         u64 search_start = 0;
1193         struct btrfs_fs_info *info = root->fs_info;
1194         struct btrfs_root *extent_root = info->extent_root;
1195         struct btrfs_extent_item extent_item;
1196         struct btrfs_key prealloc_key;
1197
1198         btrfs_set_extent_refs(&extent_item, 1);
1199         btrfs_set_extent_owner(&extent_item, owner);
1200
1201         if (root == extent_root) {
1202                 int nr;
1203                 BUG_ON(info->extent_tree_prealloc_nr == 0);
1204                 BUG_ON(num_blocks != 1);
1205                 ins->offset = 1;
1206                 info->extent_tree_prealloc_nr--;
1207                 nr = info->extent_tree_prealloc_nr;
1208                 ins->objectid = info->extent_tree_prealloc[nr];
1209                 info->extent_tree_insert[info->extent_tree_insert_nr++] =
1210                         ins->objectid;
1211                 ret = update_block_group(trans, root,
1212                                          ins->objectid, ins->offset, 1, 0, 0);
1213                 BUG_ON(ret);
1214                 return 0;
1215         }
1216
1217         /*
1218          * if we're doing a data allocation, preallocate room in the
1219          * extent tree first.  This way the extent tree blocks end up
1220          * in the correct block group.
1221          */
1222         if (data) {
1223                 ret = find_free_extent(trans, root, 0, 0,
1224                                        search_end, 0, &prealloc_key, 0);
1225                 if (ret) {
1226                         return ret;
1227                 }
1228                 if (prealloc_key.objectid + prealloc_key.offset >= search_end) {
1229                         int nr = info->extent_tree_prealloc_nr;
1230                         search_end = info->extent_tree_prealloc[nr - 1] - 1;
1231                 } else {
1232                         search_start = info->extent_tree_prealloc[0] + 1;
1233                 }
1234         }
1235         if (hint_block < search_start)
1236                 hint_block = search_start;
1237         /* do the real allocation */
1238         ret = find_free_extent(trans, root, num_blocks, search_start,
1239                                search_end, hint_block, ins, data);
1240         if (ret) {
1241                 return ret;
1242         }
1243
1244         /*
1245          * if we're doing a metadata allocation, preallocate space in the
1246          * extent tree second.  This way, we don't create a tiny hole
1247          * in the allocation map between any unused preallocation blocks
1248          * and the metadata block we're actually allocating.  On disk,
1249          * it'll go:
1250          * [block we've allocated], [used prealloc 1], [ unused prealloc ]
1251          * The unused prealloc will get reused the next time around.
1252          */
1253         if (!data) {
1254                 if (ins->objectid + ins->offset >= search_end)
1255                         search_end = ins->objectid - 1;
1256                 else
1257                         search_start = ins->objectid + ins->offset;
1258
1259                 if (hint_block < search_start)
1260                         hint_block = search_start;
1261
1262                 ret = find_free_extent(trans, root, 0, search_start,
1263                                        search_end, hint_block,
1264                                        &prealloc_key, 0);
1265                 if (ret) {
1266                         return ret;
1267                 }
1268         }
1269
1270         super_blocks_used = btrfs_super_blocks_used(info->disk_super);
1271         btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
1272                                     num_blocks);
1273         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1274                                 sizeof(extent_item));
1275
1276         finish_current_insert(trans, extent_root);
1277         pending_ret = del_pending_extents(trans, extent_root);
1278         if (ret) {
1279                 return ret;
1280         }
1281         if (pending_ret) {
1282                 return pending_ret;
1283         }
1284         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1285                                  data);
1286         BUG_ON(ret);
1287         return 0;
1288 }
1289
1290 /*
1291  * helper function to allocate a block for a given tree
1292  * returns the tree buffer or NULL.
1293  */
1294 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1295                                            struct btrfs_root *root, u64 hint)
1296 {
1297         struct btrfs_key ins;
1298         int ret;
1299         struct buffer_head *buf;
1300
1301         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1302                                  1, hint, (unsigned long)-1, &ins, 0);
1303         if (ret) {
1304                 BUG();
1305                 return NULL;
1306         }
1307         BUG_ON(ret);
1308         buf = btrfs_find_create_tree_block(root, ins.objectid);
1309         set_buffer_uptodate(buf);
1310         set_buffer_checked(buf);
1311         set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1312         return buf;
1313 }
1314
1315 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1316                          struct btrfs_root *root, struct buffer_head *cur)
1317 {
1318         struct btrfs_disk_key *key;
1319         struct btrfs_leaf *leaf;
1320         struct btrfs_file_extent_item *fi;
1321         int i;
1322         int nritems;
1323         int ret;
1324
1325         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
1326         leaf = btrfs_buffer_leaf(cur);
1327         nritems = btrfs_header_nritems(&leaf->header);
1328         for (i = 0; i < nritems; i++) {
1329                 u64 disk_blocknr;
1330                 key = &leaf->items[i].key;
1331                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
1332                         continue;
1333                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1334                 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
1335                         continue;
1336                 /*
1337                  * FIXME make sure to insert a trans record that
1338                  * repeats the snapshot del on crash
1339                  */
1340                 disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
1341                 if (disk_blocknr == 0)
1342                         continue;
1343                 ret = btrfs_free_extent(trans, root, disk_blocknr,
1344                                         btrfs_file_extent_disk_num_blocks(fi),
1345                                         0);
1346                 BUG_ON(ret);
1347         }
1348         return 0;
1349 }
1350
1351 /*
1352  * helper function for drop_snapshot, this walks down the tree dropping ref
1353  * counts as it goes.
1354  */
1355 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1356                           *root, struct btrfs_path *path, int *level)
1357 {
1358         struct buffer_head *next;
1359         struct buffer_head *cur;
1360         u64 blocknr;
1361         int ret;
1362         u32 refs;
1363
1364         WARN_ON(*level < 0);
1365         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1366         ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1367                                1, &refs);
1368         BUG_ON(ret);
1369         if (refs > 1)
1370                 goto out;
1371         /*
1372          * walk down to the last node level and free all the leaves
1373          */
1374         while(*level >= 0) {
1375                 WARN_ON(*level < 0);
1376                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1377                 cur = path->nodes[*level];
1378                 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
1379                         WARN_ON(1);
1380                 if (path->slots[*level] >=
1381                     btrfs_header_nritems(btrfs_buffer_header(cur)))
1382                         break;
1383                 if (*level == 0) {
1384                         ret = drop_leaf_ref(trans, root, cur);
1385                         BUG_ON(ret);
1386                         break;
1387                 }
1388                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
1389                                               path->slots[*level]);
1390                 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1391                 BUG_ON(ret);
1392                 if (refs != 1) {
1393                         path->slots[*level]++;
1394                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
1395                         BUG_ON(ret);
1396                         continue;
1397                 }
1398                 next = read_tree_block(root, blocknr);
1399                 WARN_ON(*level <= 0);
1400                 if (path->nodes[*level-1])
1401                         btrfs_block_release(root, path->nodes[*level-1]);
1402                 path->nodes[*level-1] = next;
1403                 *level = btrfs_header_level(btrfs_buffer_header(next));
1404                 path->slots[*level] = 0;
1405         }
1406 out:
1407         WARN_ON(*level < 0);
1408         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1409         ret = btrfs_free_extent(trans, root,
1410                                 bh_blocknr(path->nodes[*level]), 1, 1);
1411         btrfs_block_release(root, path->nodes[*level]);
1412         path->nodes[*level] = NULL;
1413         *level += 1;
1414         BUG_ON(ret);
1415         return 0;
1416 }
1417
1418 /*
1419  * helper for dropping snapshots.  This walks back up the tree in the path
1420  * to find the first node higher up where we haven't yet gone through
1421  * all the slots
1422  */
1423 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1424                         *root, struct btrfs_path *path, int *level)
1425 {
1426         int i;
1427         int slot;
1428         int ret;
1429         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1430                 slot = path->slots[i];
1431                 if (slot < btrfs_header_nritems(
1432                     btrfs_buffer_header(path->nodes[i])) - 1) {
1433                         path->slots[i]++;
1434                         *level = i;
1435                         return 0;
1436                 } else {
1437                         ret = btrfs_free_extent(trans, root,
1438                                                 bh_blocknr(path->nodes[*level]),
1439                                                 1, 1);
1440                         BUG_ON(ret);
1441                         btrfs_block_release(root, path->nodes[*level]);
1442                         path->nodes[*level] = NULL;
1443                         *level = i + 1;
1444                 }
1445         }
1446         return 1;
1447 }
1448
1449 /*
1450  * drop the reference count on the tree rooted at 'snap'.  This traverses
1451  * the tree freeing any blocks that have a ref count of zero after being
1452  * decremented.
1453  */
1454 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1455                         *root, struct buffer_head *snap)
1456 {
1457         int ret = 0;
1458         int wret;
1459         int level;
1460         struct btrfs_path *path;
1461         int i;
1462         int orig_level;
1463
1464         path = btrfs_alloc_path();
1465         BUG_ON(!path);
1466         btrfs_init_path(path);
1467
1468         level = btrfs_header_level(btrfs_buffer_header(snap));
1469         orig_level = level;
1470         path->nodes[level] = snap;
1471         path->slots[level] = 0;
1472         while(1) {
1473                 wret = walk_down_tree(trans, root, path, &level);
1474                 if (wret > 0)
1475                         break;
1476                 if (wret < 0)
1477                         ret = wret;
1478
1479                 wret = walk_up_tree(trans, root, path, &level);
1480                 if (wret > 0)
1481                         break;
1482                 if (wret < 0)
1483                         ret = wret;
1484                 btrfs_btree_balance_dirty(root);
1485         }
1486         for (i = 0; i <= orig_level; i++) {
1487                 if (path->nodes[i]) {
1488                         btrfs_block_release(root, path->nodes[i]);
1489                 }
1490         }
1491         btrfs_free_path(path);
1492         return ret;
1493 }
1494
1495 static int free_block_group_radix(struct radix_tree_root *radix)
1496 {
1497         int ret;
1498         struct btrfs_block_group_cache *cache[8];
1499         int i;
1500
1501         while(1) {
1502                 ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
1503                                              ARRAY_SIZE(cache));
1504                 if (!ret)
1505                         break;
1506                 for (i = 0; i < ret; i++) {
1507                         radix_tree_delete(radix, cache[i]->key.objectid +
1508                                           cache[i]->key.offset - 1);
1509                         kfree(cache[i]);
1510                 }
1511         }
1512         return 0;
1513 }
1514
1515 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1516 {
1517         int ret;
1518         int ret2;
1519         unsigned long gang[16];
1520         int i;
1521
1522         ret = free_block_group_radix(&info->block_group_radix);
1523         ret2 = free_block_group_radix(&info->block_group_data_radix);
1524         if (ret)
1525                 return ret;
1526         if (ret2)
1527                 return ret2;
1528
1529         while(1) {
1530                 ret = find_first_radix_bit(&info->extent_map_radix,
1531                                            gang, 0, ARRAY_SIZE(gang));
1532                 if (!ret)
1533                         break;
1534                 for (i = 0; i < ret; i++) {
1535                         clear_radix_bit(&info->extent_map_radix, gang[i]);
1536                 }
1537         }
1538         return 0;
1539 }
1540
1541 int btrfs_read_block_groups(struct btrfs_root *root)
1542 {
1543         struct btrfs_path *path;
1544         int ret;
1545         int err = 0;
1546         struct btrfs_block_group_item *bi;
1547         struct btrfs_block_group_cache *cache;
1548         struct btrfs_fs_info *info = root->fs_info;
1549         struct radix_tree_root *radix;
1550         struct btrfs_key key;
1551         struct btrfs_key found_key;
1552         struct btrfs_leaf *leaf;
1553         u64 group_size_blocks;
1554         u64 used;
1555
1556         group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >>
1557                 root->fs_info->sb->s_blocksize_bits;
1558         root = info->extent_root;
1559         key.objectid = 0;
1560         key.offset = group_size_blocks;
1561         key.flags = 0;
1562         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1563
1564         path = btrfs_alloc_path();
1565         if (!path)
1566                 return -ENOMEM;
1567
1568         while(1) {
1569                 ret = btrfs_search_slot(NULL, info->extent_root,
1570                                         &key, path, 0, 0);
1571                 if (ret != 0) {
1572                         err = ret;
1573                         break;
1574                 }
1575                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1576                 btrfs_disk_key_to_cpu(&found_key,
1577                                       &leaf->items[path->slots[0]].key);
1578                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1579                 if (!cache) {
1580                         err = -1;
1581                         break;
1582                 }
1583
1584                 bi = btrfs_item_ptr(leaf, path->slots[0],
1585                                     struct btrfs_block_group_item);
1586                 if (bi->flags & BTRFS_BLOCK_GROUP_DATA) {
1587                         radix = &info->block_group_data_radix;
1588                         cache->data = 1;
1589                 } else {
1590                         radix = &info->block_group_radix;
1591                         cache->data = 0;
1592                 }
1593
1594                 memcpy(&cache->item, bi, sizeof(*bi));
1595                 memcpy(&cache->key, &found_key, sizeof(found_key));
1596                 cache->last_alloc = cache->key.objectid;
1597                 cache->first_free = cache->key.objectid;
1598                 cache->last_prealloc = cache->key.objectid;
1599                 cache->pinned = 0;
1600                 cache->cached = 0;
1601
1602                 cache->radix = radix;
1603
1604                 key.objectid = found_key.objectid + found_key.offset;
1605                 btrfs_release_path(root, path);
1606                 ret = radix_tree_insert(radix, found_key.objectid +
1607                                         found_key.offset - 1,
1608                                         (void *)cache);
1609                 BUG_ON(ret);
1610                 used = btrfs_block_group_used(bi);
1611                 if (used < div_factor(key.offset, 8)) {
1612                         radix_tree_tag_set(radix, found_key.objectid +
1613                                            found_key.offset - 1,
1614                                            BTRFS_BLOCK_GROUP_AVAIL);
1615                 }
1616                 if (key.objectid >=
1617                     btrfs_super_total_blocks(info->disk_super))
1618                         break;
1619         }
1620
1621         btrfs_free_path(path);
1622         return 0;
1623 }