Btrfs: factor page private preparations into a helper
[safe/jmp/linux-2.6] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "print-tree.h"
23 #include "transaction.h"
24
25 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
26                                  btrfs_root *extent_root);
27 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
28                                btrfs_root *extent_root);
29
30 static int cache_block_group(struct btrfs_root *root,
31                              struct btrfs_block_group_cache *block_group)
32 {
33         struct btrfs_path *path;
34         int ret;
35         struct btrfs_key key;
36         struct btrfs_leaf *leaf;
37         struct radix_tree_root *extent_radix;
38         int slot;
39         u64 i;
40         u64 last = 0;
41         u64 hole_size;
42         u64 first_free;
43         int found = 0;
44
45         root = root->fs_info->extent_root;
46         extent_radix = &root->fs_info->extent_map_radix;
47
48         if (block_group->cached)
49                 return 0;
50         if (block_group->data)
51                 return 0;
52         path = btrfs_alloc_path();
53         if (!path)
54                 return -ENOMEM;
55
56         path->reada = 2;
57         first_free = block_group->key.objectid;
58         key.objectid = block_group->key.objectid;
59         key.flags = 0;
60         key.offset = 0;
61
62         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
63         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
64
65         if (ret < 0)
66                 return ret;
67
68         if (ret && path->slots[0] > 0)
69                 path->slots[0]--;
70
71         while(1) {
72                 leaf = btrfs_buffer_leaf(path->nodes[0]);
73                 slot = path->slots[0];
74                 if (slot >= btrfs_header_nritems(&leaf->header)) {
75                         ret = btrfs_next_leaf(root, path);
76                         if (ret < 0)
77                                 goto err;
78                         if (ret == 0) {
79                                 continue;
80                         } else {
81                                 break;
82                         }
83                 }
84
85                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
86                 if (key.objectid < block_group->key.objectid) {
87                         if (key.objectid + key.offset > first_free)
88                                 first_free = key.objectid + key.offset;
89                         goto next;
90                 }
91
92                 if (key.objectid >= block_group->key.objectid +
93                     block_group->key.offset) {
94                         break;
95                 }
96
97                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
98                         if (!found) {
99                                 last = first_free;
100                                 found = 1;
101                         }
102                         hole_size = key.objectid - last;
103                         for (i = 0; i < hole_size; i++) {
104                                 set_radix_bit(extent_radix, last + i);
105                         }
106                         last = key.objectid + key.offset;
107                 }
108 next:
109                 path->slots[0]++;
110         }
111
112         if (!found)
113                 last = first_free;
114         if (block_group->key.objectid +
115             block_group->key.offset > last) {
116                 hole_size = block_group->key.objectid +
117                         block_group->key.offset - last;
118                 for (i = 0; i < hole_size; i++) {
119                         set_radix_bit(extent_radix,
120                                         last + i);
121                 }
122         }
123         block_group->cached = 1;
124 err:
125         btrfs_free_path(path);
126         return 0;
127 }
128
129 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
130                                                          btrfs_fs_info *info,
131                                                          u64 blocknr)
132 {
133         struct btrfs_block_group_cache *block_group;
134         int ret;
135
136         ret = radix_tree_gang_lookup(&info->block_group_radix,
137                                      (void **)&block_group,
138                                      blocknr, 1);
139         if (ret) {
140                 if (block_group->key.objectid <= blocknr && blocknr <=
141                     block_group->key.objectid + block_group->key.offset)
142                         return block_group;
143         }
144         ret = radix_tree_gang_lookup(&info->block_group_data_radix,
145                                      (void **)&block_group,
146                                      blocknr, 1);
147         if (ret) {
148                 if (block_group->key.objectid <= blocknr && blocknr <=
149                     block_group->key.objectid + block_group->key.offset)
150                         return block_group;
151         }
152         return NULL;
153 }
154
155 static u64 leaf_range(struct btrfs_root *root)
156 {
157         u64 size = BTRFS_LEAF_DATA_SIZE(root);
158         do_div(size, sizeof(struct btrfs_extent_item) +
159                 sizeof(struct btrfs_item));
160         return size;
161 }
162
163 static u64 find_search_start(struct btrfs_root *root,
164                              struct btrfs_block_group_cache **cache_ret,
165                              u64 search_start, int num)
166 {
167         unsigned long gang[8];
168         int ret;
169         struct btrfs_block_group_cache *cache = *cache_ret;
170         u64 last = max(search_start, cache->key.objectid);
171
172         if (cache->data)
173                 goto out;
174 again:
175         ret = cache_block_group(root, cache);
176         if (ret)
177                 goto out;
178         while(1) {
179                 ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
180                                            gang, last, ARRAY_SIZE(gang));
181                 if (!ret)
182                         goto out;
183                 last = gang[ret-1] + 1;
184                 if (num > 1) {
185                         if (ret != ARRAY_SIZE(gang)) {
186                                 goto new_group;
187                         }
188                         if (gang[ret-1] - gang[0] > leaf_range(root)) {
189                                 continue;
190                         }
191                 }
192                 if (gang[0] >= cache->key.objectid + cache->key.offset) {
193                         goto new_group;
194                 }
195                 return gang[0];
196         }
197 out:
198         return max(cache->last_alloc, search_start);
199
200 new_group:
201         cache = btrfs_lookup_block_group(root->fs_info,
202                                          last + cache->key.offset - 1);
203         if (!cache) {
204                 return max((*cache_ret)->last_alloc, search_start);
205         }
206         cache = btrfs_find_block_group(root, cache,
207                                        last + cache->key.offset - 1, 0, 0);
208         *cache_ret = cache;
209         goto again;
210 }
211
212 static u64 div_factor(u64 num, int factor)
213 {
214         num *= factor;
215         do_div(num, 10);
216         return num;
217 }
218
219 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
220                                                  struct btrfs_block_group_cache
221                                                  *hint, u64 search_start,
222                                                  int data, int owner)
223 {
224         struct btrfs_block_group_cache *cache[8];
225         struct btrfs_block_group_cache *found_group = NULL;
226         struct btrfs_fs_info *info = root->fs_info;
227         struct radix_tree_root *radix;
228         struct radix_tree_root *swap_radix;
229         u64 used;
230         u64 last = 0;
231         u64 hint_last;
232         int i;
233         int ret;
234         int full_search = 0;
235         int factor = 8;
236         int data_swap = 0;
237
238         if (!owner)
239                 factor = 5;
240
241         if (data) {
242                 radix = &info->block_group_data_radix;
243                 swap_radix = &info->block_group_radix;
244         } else {
245                 radix = &info->block_group_radix;
246                 swap_radix = &info->block_group_data_radix;
247         }
248
249         if (search_start) {
250                 struct btrfs_block_group_cache *shint;
251                 shint = btrfs_lookup_block_group(info, search_start);
252                 if (shint && shint->data == data) {
253                         used = btrfs_block_group_used(&shint->item);
254                         if (used + shint->pinned <
255                             div_factor(shint->key.offset, factor)) {
256                                 return shint;
257                         }
258                 }
259         }
260         if (hint && hint->data == data) {
261                 used = btrfs_block_group_used(&hint->item);
262                 if (used + hint->pinned <
263                     div_factor(hint->key.offset, factor)) {
264                         return hint;
265                 }
266                 if (used >= div_factor(hint->key.offset, 8)) {
267                         radix_tree_tag_clear(radix,
268                                              hint->key.objectid +
269                                              hint->key.offset - 1,
270                                              BTRFS_BLOCK_GROUP_AVAIL);
271                 }
272                 last = hint->key.offset * 3;
273                 if (hint->key.objectid >= last)
274                         last = max(search_start + hint->key.offset - 1,
275                                    hint->key.objectid - last);
276                 else
277                         last = hint->key.objectid + hint->key.offset;
278                 hint_last = last;
279         } else {
280                 if (hint)
281                         hint_last = max(hint->key.objectid, search_start);
282                 else
283                         hint_last = search_start;
284
285                 last = hint_last;
286         }
287         while(1) {
288                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
289                                                  last, ARRAY_SIZE(cache),
290                                                  BTRFS_BLOCK_GROUP_AVAIL);
291                 if (!ret)
292                         break;
293                 for (i = 0; i < ret; i++) {
294                         last = cache[i]->key.objectid +
295                                 cache[i]->key.offset;
296                         used = btrfs_block_group_used(&cache[i]->item);
297                         if (used + cache[i]->pinned <
298                             div_factor(cache[i]->key.offset, factor)) {
299                                 found_group = cache[i];
300                                 goto found;
301                         }
302                         if (used >= div_factor(cache[i]->key.offset, 8)) {
303                                 radix_tree_tag_clear(radix,
304                                                      cache[i]->key.objectid +
305                                                      cache[i]->key.offset - 1,
306                                                      BTRFS_BLOCK_GROUP_AVAIL);
307                         }
308                 }
309                 cond_resched();
310         }
311         last = hint_last;
312 again:
313         while(1) {
314                 ret = radix_tree_gang_lookup(radix, (void **)cache,
315                                              last, ARRAY_SIZE(cache));
316                 if (!ret)
317                         break;
318                 for (i = 0; i < ret; i++) {
319                         last = cache[i]->key.objectid +
320                                 cache[i]->key.offset;
321                         used = btrfs_block_group_used(&cache[i]->item);
322                         if (used + cache[i]->pinned < cache[i]->key.offset) {
323                                 found_group = cache[i];
324                                 goto found;
325                         }
326                         if (used >= cache[i]->key.offset) {
327                                 radix_tree_tag_clear(radix,
328                                                      cache[i]->key.objectid +
329                                                      cache[i]->key.offset - 1,
330                                                      BTRFS_BLOCK_GROUP_AVAIL);
331                         }
332                 }
333                 cond_resched();
334         }
335         if (!full_search) {
336                 last = search_start;
337                 full_search = 1;
338                 goto again;
339         }
340         if (!data_swap) {
341                 struct radix_tree_root *tmp = radix;
342                 data_swap = 1;
343                 radix = swap_radix;
344                 swap_radix = tmp;
345                 last = search_start;
346                 goto again;
347         }
348         if (!found_group) {
349                 ret = radix_tree_gang_lookup(radix,
350                                              (void **)&found_group, 0, 1);
351                 if (ret == 0) {
352                         ret = radix_tree_gang_lookup(swap_radix,
353                                                      (void **)&found_group,
354                                                      0, 1);
355                 }
356                 BUG_ON(ret != 1);
357         }
358 found:
359         return found_group;
360 }
361
362 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
363                                 struct btrfs_root *root,
364                                 u64 blocknr, u64 num_blocks)
365 {
366         struct btrfs_path *path;
367         int ret;
368         struct btrfs_key key;
369         struct btrfs_leaf *l;
370         struct btrfs_extent_item *item;
371         u32 refs;
372
373         path = btrfs_alloc_path();
374         if (!path)
375                 return -ENOMEM;
376
377         key.objectid = blocknr;
378         key.flags = 0;
379         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
380         key.offset = num_blocks;
381         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
382                                 0, 1);
383         if (ret < 0)
384                 return ret;
385         if (ret != 0) {
386                 BUG();
387         }
388         BUG_ON(ret != 0);
389         l = btrfs_buffer_leaf(path->nodes[0]);
390         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
391         refs = btrfs_extent_refs(item);
392         btrfs_set_extent_refs(item, refs + 1);
393         btrfs_mark_buffer_dirty(path->nodes[0]);
394
395         btrfs_release_path(root->fs_info->extent_root, path);
396         btrfs_free_path(path);
397         finish_current_insert(trans, root->fs_info->extent_root);
398         del_pending_extents(trans, root->fs_info->extent_root);
399         return 0;
400 }
401
402 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
403                          struct btrfs_root *root)
404 {
405         finish_current_insert(trans, root->fs_info->extent_root);
406         del_pending_extents(trans, root->fs_info->extent_root);
407         return 0;
408 }
409
410 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
411                              struct btrfs_root *root, u64 blocknr,
412                              u64 num_blocks, u32 *refs)
413 {
414         struct btrfs_path *path;
415         int ret;
416         struct btrfs_key key;
417         struct btrfs_leaf *l;
418         struct btrfs_extent_item *item;
419
420         path = btrfs_alloc_path();
421         key.objectid = blocknr;
422         key.offset = num_blocks;
423         key.flags = 0;
424         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
425         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
426                                 0, 0);
427         if (ret < 0)
428                 goto out;
429         if (ret != 0)
430                 BUG();
431         l = btrfs_buffer_leaf(path->nodes[0]);
432         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
433         *refs = btrfs_extent_refs(item);
434 out:
435         btrfs_free_path(path);
436         return 0;
437 }
438
439 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
440                        struct btrfs_root *root)
441 {
442         return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
443 }
444
445 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
446                   struct buffer_head *buf)
447 {
448         u64 blocknr;
449         struct btrfs_node *buf_node;
450         struct btrfs_leaf *buf_leaf;
451         struct btrfs_disk_key *key;
452         struct btrfs_file_extent_item *fi;
453         int i;
454         int leaf;
455         int ret;
456         int faili;
457         int err;
458
459         if (!root->ref_cows)
460                 return 0;
461         buf_node = btrfs_buffer_node(buf);
462         leaf = btrfs_is_leaf(buf_node);
463         buf_leaf = btrfs_buffer_leaf(buf);
464         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
465                 if (leaf) {
466                         u64 disk_blocknr;
467                         key = &buf_leaf->items[i].key;
468                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
469                                 continue;
470                         fi = btrfs_item_ptr(buf_leaf, i,
471                                             struct btrfs_file_extent_item);
472                         if (btrfs_file_extent_type(fi) ==
473                             BTRFS_FILE_EXTENT_INLINE)
474                                 continue;
475                         disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
476                         if (disk_blocknr == 0)
477                                 continue;
478                         ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
479                                     btrfs_file_extent_disk_num_blocks(fi));
480                         if (ret) {
481                                 faili = i;
482                                 goto fail;
483                         }
484                 } else {
485                         blocknr = btrfs_node_blockptr(buf_node, i);
486                         ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
487                         if (ret) {
488                                 faili = i;
489                                 goto fail;
490                         }
491                 }
492         }
493         return 0;
494 fail:
495         WARN_ON(1);
496         for (i =0; i < faili; i++) {
497                 if (leaf) {
498                         u64 disk_blocknr;
499                         key = &buf_leaf->items[i].key;
500                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
501                                 continue;
502                         fi = btrfs_item_ptr(buf_leaf, i,
503                                             struct btrfs_file_extent_item);
504                         if (btrfs_file_extent_type(fi) ==
505                             BTRFS_FILE_EXTENT_INLINE)
506                                 continue;
507                         disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
508                         if (disk_blocknr == 0)
509                                 continue;
510                         err = btrfs_free_extent(trans, root, disk_blocknr,
511                                     btrfs_file_extent_disk_num_blocks(fi), 0);
512                         BUG_ON(err);
513                 } else {
514                         blocknr = btrfs_node_blockptr(buf_node, i);
515                         err = btrfs_free_extent(trans, root, blocknr, 1, 0);
516                         BUG_ON(err);
517                 }
518         }
519         return ret;
520 }
521
522 static int write_one_cache_group(struct btrfs_trans_handle *trans,
523                                  struct btrfs_root *root,
524                                  struct btrfs_path *path,
525                                  struct btrfs_block_group_cache *cache)
526 {
527         int ret;
528         int pending_ret;
529         struct btrfs_root *extent_root = root->fs_info->extent_root;
530         struct btrfs_block_group_item *bi;
531
532         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
533         if (ret < 0)
534                 goto fail;
535         BUG_ON(ret);
536         bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
537                             struct btrfs_block_group_item);
538         memcpy(bi, &cache->item, sizeof(*bi));
539         btrfs_mark_buffer_dirty(path->nodes[0]);
540         btrfs_release_path(extent_root, path);
541 fail:
542         finish_current_insert(trans, extent_root);
543         pending_ret = del_pending_extents(trans, extent_root);
544         if (ret)
545                 return ret;
546         if (pending_ret)
547                 return pending_ret;
548         if (cache->data)
549                 cache->last_alloc = cache->first_free;
550         return 0;
551
552 }
553
554 static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
555                                    struct btrfs_root *root,
556                                    struct radix_tree_root *radix)
557 {
558         struct btrfs_block_group_cache *cache[8];
559         int ret;
560         int err = 0;
561         int werr = 0;
562         int i;
563         struct btrfs_path *path;
564         unsigned long off = 0;
565
566         path = btrfs_alloc_path();
567         if (!path)
568                 return -ENOMEM;
569
570         while(1) {
571                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
572                                                  off, ARRAY_SIZE(cache),
573                                                  BTRFS_BLOCK_GROUP_DIRTY);
574                 if (!ret)
575                         break;
576                 for (i = 0; i < ret; i++) {
577                         err = write_one_cache_group(trans, root,
578                                                     path, cache[i]);
579                         /*
580                          * if we fail to write the cache group, we want
581                          * to keep it marked dirty in hopes that a later
582                          * write will work
583                          */
584                         if (err) {
585                                 werr = err;
586                                 off = cache[i]->key.objectid +
587                                         cache[i]->key.offset;
588                                 continue;
589                         }
590
591                         radix_tree_tag_clear(radix, cache[i]->key.objectid +
592                                              cache[i]->key.offset - 1,
593                                              BTRFS_BLOCK_GROUP_DIRTY);
594                 }
595         }
596         btrfs_free_path(path);
597         return werr;
598 }
599
600 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
601                                    struct btrfs_root *root)
602 {
603         int ret;
604         int ret2;
605         ret = write_dirty_block_radix(trans, root,
606                                       &root->fs_info->block_group_radix);
607         ret2 = write_dirty_block_radix(trans, root,
608                                       &root->fs_info->block_group_data_radix);
609         if (ret)
610                 return ret;
611         if (ret2)
612                 return ret2;
613         return 0;
614 }
615
616 static int update_block_group(struct btrfs_trans_handle *trans,
617                               struct btrfs_root *root,
618                               u64 blocknr, u64 num, int alloc, int mark_free,
619                               int data)
620 {
621         struct btrfs_block_group_cache *cache;
622         struct btrfs_fs_info *info = root->fs_info;
623         u64 total = num;
624         u64 old_val;
625         u64 block_in_group;
626         u64 i;
627         int ret;
628
629         while(total) {
630                 cache = btrfs_lookup_block_group(info, blocknr);
631                 if (!cache) {
632                         return -1;
633                 }
634                 block_in_group = blocknr - cache->key.objectid;
635                 WARN_ON(block_in_group > cache->key.offset);
636                 radix_tree_tag_set(cache->radix, cache->key.objectid +
637                                    cache->key.offset - 1,
638                                    BTRFS_BLOCK_GROUP_DIRTY);
639
640                 old_val = btrfs_block_group_used(&cache->item);
641                 num = min(total, cache->key.offset - block_in_group);
642                 if (alloc) {
643                         if (blocknr > cache->last_alloc)
644                                 cache->last_alloc = blocknr;
645                         if (!cache->data) {
646                                 for (i = 0; i < num; i++) {
647                                         clear_radix_bit(&info->extent_map_radix,
648                                                         blocknr + i);
649                                 }
650                         }
651                         if (cache->data != data &&
652                             old_val < (cache->key.offset >> 1)) {
653                                 cache->data = data;
654                                 radix_tree_delete(cache->radix,
655                                                   cache->key.objectid +
656                                                   cache->key.offset - 1);
657
658                                 if (data) {
659                                         cache->radix =
660                                                 &info->block_group_data_radix;
661                                         cache->item.flags |=
662                                                 BTRFS_BLOCK_GROUP_DATA;
663                                 } else {
664                                         cache->radix = &info->block_group_radix;
665                                         cache->item.flags &=
666                                                 ~BTRFS_BLOCK_GROUP_DATA;
667                                 }
668                                 ret = radix_tree_insert(cache->radix,
669                                                         cache->key.objectid +
670                                                         cache->key.offset - 1,
671                                                         (void *)cache);
672                         }
673                         old_val += num;
674                 } else {
675                         old_val -= num;
676                         if (blocknr < cache->first_free)
677                                 cache->first_free = blocknr;
678                         if (!cache->data && mark_free) {
679                                 for (i = 0; i < num; i++) {
680                                         set_radix_bit(&info->extent_map_radix,
681                                                       blocknr + i);
682                                 }
683                         }
684                         if (old_val < (cache->key.offset >> 1) &&
685                             old_val + num >= (cache->key.offset >> 1)) {
686                                 radix_tree_tag_set(cache->radix,
687                                                    cache->key.objectid +
688                                                    cache->key.offset - 1,
689                                                    BTRFS_BLOCK_GROUP_AVAIL);
690                         }
691                 }
692                 btrfs_set_block_group_used(&cache->item, old_val);
693                 total -= num;
694                 blocknr += num;
695         }
696         return 0;
697 }
698
699 int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy)
700 {
701         unsigned long gang[8];
702         u64 last = 0;
703         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
704         int ret;
705         int i;
706
707         while(1) {
708                 ret = find_first_radix_bit(pinned_radix, gang, last,
709                                            ARRAY_SIZE(gang));
710                 if (!ret)
711                         break;
712                 for (i = 0 ; i < ret; i++) {
713                         set_radix_bit(copy, gang[i]);
714                         last = gang[i] + 1;
715                 }
716         }
717         ret = find_first_radix_bit(&root->fs_info->extent_ins_radix, gang, 0,
718                                    ARRAY_SIZE(gang));
719         WARN_ON(ret);
720         return 0;
721 }
722
723 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
724                                struct btrfs_root *root,
725                                struct radix_tree_root *unpin_radix)
726 {
727         unsigned long gang[8];
728         struct btrfs_block_group_cache *block_group;
729         u64 first = 0;
730         int ret;
731         int i;
732         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
733         struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
734
735         while(1) {
736                 ret = find_first_radix_bit(unpin_radix, gang, 0,
737                                            ARRAY_SIZE(gang));
738                 if (!ret)
739                         break;
740                 if (!first)
741                         first = gang[0];
742                 for (i = 0; i < ret; i++) {
743                         clear_radix_bit(pinned_radix, gang[i]);
744                         clear_radix_bit(unpin_radix, gang[i]);
745                         block_group = btrfs_lookup_block_group(root->fs_info,
746                                                                gang[i]);
747                         if (block_group) {
748                                 WARN_ON(block_group->pinned == 0);
749                                 block_group->pinned--;
750                                 if (gang[i] < block_group->last_alloc)
751                                         block_group->last_alloc = gang[i];
752                                 if (!block_group->data)
753                                         set_radix_bit(extent_radix, gang[i]);
754                         }
755                 }
756         }
757         return 0;
758 }
759
760 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
761                                  btrfs_root *extent_root)
762 {
763         struct btrfs_key ins;
764         struct btrfs_extent_item extent_item;
765         int i;
766         int ret;
767         int err;
768         unsigned long gang[8];
769         struct btrfs_fs_info *info = extent_root->fs_info;
770
771         btrfs_set_extent_refs(&extent_item, 1);
772         ins.offset = 1;
773         ins.flags = 0;
774         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
775         btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
776
777         while(1) {
778                 ret = find_first_radix_bit(&info->extent_ins_radix, gang, 0,
779                                            ARRAY_SIZE(gang));
780                 if (!ret)
781                         break;
782
783                 for (i = 0; i < ret; i++) {
784                         ins.objectid = gang[i];
785                         err = btrfs_insert_item(trans, extent_root, &ins,
786                                                 &extent_item,
787                                                 sizeof(extent_item));
788                         clear_radix_bit(&info->extent_ins_radix, gang[i]);
789                         WARN_ON(err);
790                 }
791         }
792         return 0;
793 }
794
795 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
796 {
797         int err;
798         struct btrfs_header *header;
799         struct buffer_head *bh;
800
801         if (!pending) {
802                 bh = btrfs_find_tree_block(root, blocknr);
803                 if (bh) {
804                         if (buffer_uptodate(bh)) {
805                                 u64 transid =
806                                     root->fs_info->running_transaction->transid;
807                                 header = btrfs_buffer_header(bh);
808                                 if (btrfs_header_generation(header) ==
809                                     transid) {
810                                         btrfs_block_release(root, bh);
811                                         return 0;
812                                 }
813                         }
814                         btrfs_block_release(root, bh);
815                 }
816                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
817                 if (!err) {
818                         struct btrfs_block_group_cache *cache;
819                         cache = btrfs_lookup_block_group(root->fs_info,
820                                                          blocknr);
821                         if (cache)
822                                 cache->pinned++;
823                 }
824         } else {
825                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
826         }
827         BUG_ON(err < 0);
828         return 0;
829 }
830
831 /*
832  * remove an extent from the root, returns 0 on success
833  */
834 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
835                          *root, u64 blocknr, u64 num_blocks, int pin,
836                          int mark_free)
837 {
838         struct btrfs_path *path;
839         struct btrfs_key key;
840         struct btrfs_fs_info *info = root->fs_info;
841         struct btrfs_root *extent_root = info->extent_root;
842         int ret;
843         struct btrfs_extent_item *ei;
844         u32 refs;
845
846         key.objectid = blocknr;
847         key.flags = 0;
848         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
849         key.offset = num_blocks;
850
851         path = btrfs_alloc_path();
852         if (!path)
853                 return -ENOMEM;
854
855         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
856         if (ret < 0)
857                 return ret;
858         BUG_ON(ret);
859         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
860                             struct btrfs_extent_item);
861         BUG_ON(ei->refs == 0);
862         refs = btrfs_extent_refs(ei) - 1;
863         btrfs_set_extent_refs(ei, refs);
864         btrfs_mark_buffer_dirty(path->nodes[0]);
865         if (refs == 0) {
866                 u64 super_blocks_used, root_blocks_used;
867
868                 if (pin) {
869                         ret = pin_down_block(root, blocknr, 0);
870                         BUG_ON(ret);
871                 }
872
873                 /* block accounting for super block */
874                 super_blocks_used = btrfs_super_blocks_used(&info->super_copy);
875                 btrfs_set_super_blocks_used(&info->super_copy,
876                                             super_blocks_used - num_blocks);
877
878                 /* block accounting for root item */
879                 root_blocks_used = btrfs_root_blocks_used(&root->root_item);
880                 btrfs_set_root_blocks_used(&root->root_item,
881                                            root_blocks_used - num_blocks);
882
883                 ret = btrfs_del_item(trans, extent_root, path);
884                 if (ret) {
885                         return ret;
886                 }
887                 ret = update_block_group(trans, root, blocknr, num_blocks, 0,
888                                          mark_free, 0);
889                 BUG_ON(ret);
890         }
891         btrfs_free_path(path);
892         finish_current_insert(trans, extent_root);
893         return ret;
894 }
895
896 /*
897  * find all the blocks marked as pending in the radix tree and remove
898  * them from the extent map
899  */
900 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
901                                btrfs_root *extent_root)
902 {
903         int ret;
904         int wret;
905         int err = 0;
906         unsigned long gang[4];
907         int i;
908         struct radix_tree_root *pending_radix;
909         struct radix_tree_root *pinned_radix;
910         struct btrfs_block_group_cache *cache;
911
912         pending_radix = &extent_root->fs_info->pending_del_radix;
913         pinned_radix = &extent_root->fs_info->pinned_radix;
914
915         while(1) {
916                 ret = find_first_radix_bit(pending_radix, gang, 0,
917                                            ARRAY_SIZE(gang));
918                 if (!ret)
919                         break;
920                 for (i = 0; i < ret; i++) {
921                         wret = set_radix_bit(pinned_radix, gang[i]);
922                         if (wret == 0) {
923                                 cache =
924                                   btrfs_lookup_block_group(extent_root->fs_info,
925                                                            gang[i]);
926                                 if (cache)
927                                         cache->pinned++;
928                         }
929                         if (wret < 0) {
930                                 printk(KERN_CRIT "set_radix_bit, err %d\n",
931                                        wret);
932                                 BUG_ON(wret < 0);
933                         }
934                         wret = clear_radix_bit(pending_radix, gang[i]);
935                         BUG_ON(wret);
936                         wret = __free_extent(trans, extent_root,
937                                              gang[i], 1, 0, 0);
938                         if (wret)
939                                 err = wret;
940                 }
941         }
942         return err;
943 }
944
945 /*
946  * remove an extent from the root, returns 0 on success
947  */
948 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
949                       *root, u64 blocknr, u64 num_blocks, int pin)
950 {
951         struct btrfs_root *extent_root = root->fs_info->extent_root;
952         int pending_ret;
953         int ret;
954
955         if (root == extent_root) {
956                 pin_down_block(root, blocknr, 1);
957                 return 0;
958         }
959         ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
960         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
961         return ret ? ret : pending_ret;
962 }
963
964 /*
965  * walks the btree of allocated extents and find a hole of a given size.
966  * The key ins is changed to record the hole:
967  * ins->objectid == block start
968  * ins->flags = BTRFS_EXTENT_ITEM_KEY
969  * ins->offset == number of blocks
970  * Any available blocks before search_start are skipped.
971  */
972 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
973                             *orig_root, u64 num_blocks, u64 empty_size,
974                             u64 search_start, u64 search_end, u64 hint_block,
975                             struct btrfs_key *ins, u64 exclude_start,
976                             u64 exclude_nr, int data)
977 {
978         struct btrfs_path *path;
979         struct btrfs_key key;
980         int ret;
981         u64 hole_size = 0;
982         int slot = 0;
983         u64 last_block = 0;
984         u64 test_block;
985         u64 orig_search_start = search_start;
986         int start_found;
987         struct btrfs_leaf *l;
988         struct btrfs_root * root = orig_root->fs_info->extent_root;
989         struct btrfs_fs_info *info = root->fs_info;
990         int total_needed = num_blocks;
991         int level;
992         struct btrfs_block_group_cache *block_group;
993         int full_scan = 0;
994         int wrapped = 0;
995
996         WARN_ON(num_blocks < 1);
997         ins->flags = 0;
998         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
999
1000         level = btrfs_header_level(btrfs_buffer_header(root->node));
1001         if (search_end == (u64)-1)
1002                 search_end = btrfs_super_total_blocks(&info->super_copy);
1003         if (hint_block) {
1004                 block_group = btrfs_lookup_block_group(info, hint_block);
1005                 block_group = btrfs_find_block_group(root, block_group,
1006                                                      hint_block, data, 1);
1007         } else {
1008                 block_group = btrfs_find_block_group(root,
1009                                                      trans->block_group, 0,
1010                                                      data, 1);
1011         }
1012
1013         total_needed += empty_size;
1014         path = btrfs_alloc_path();
1015
1016 check_failed:
1017         if (!block_group->data)
1018                 search_start = find_search_start(root, &block_group,
1019                                                  search_start, total_needed);
1020         else if (!full_scan)
1021                 search_start = max(block_group->last_alloc, search_start);
1022
1023         btrfs_init_path(path);
1024         ins->objectid = search_start;
1025         ins->offset = 0;
1026         start_found = 0;
1027         path->reada = 2;
1028
1029         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1030         if (ret < 0)
1031                 goto error;
1032
1033         if (path->slots[0] > 0) {
1034                 path->slots[0]--;
1035         }
1036
1037         l = btrfs_buffer_leaf(path->nodes[0]);
1038         btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
1039         /*
1040          * a rare case, go back one key if we hit a block group item
1041          * instead of an extent item
1042          */
1043         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
1044             key.objectid + key.offset >= search_start) {
1045                 ins->objectid = key.objectid;
1046                 ins->offset = key.offset - 1;
1047                 btrfs_release_path(root, path);
1048                 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1049                 if (ret < 0)
1050                         goto error;
1051
1052                 if (path->slots[0] > 0) {
1053                         path->slots[0]--;
1054                 }
1055         }
1056
1057         while (1) {
1058                 l = btrfs_buffer_leaf(path->nodes[0]);
1059                 slot = path->slots[0];
1060                 if (slot >= btrfs_header_nritems(&l->header)) {
1061                         ret = btrfs_next_leaf(root, path);
1062                         if (ret == 0)
1063                                 continue;
1064                         if (ret < 0)
1065                                 goto error;
1066                         if (!start_found) {
1067                                 ins->objectid = search_start;
1068                                 ins->offset = search_end - search_start;
1069                                 start_found = 1;
1070                                 goto check_pending;
1071                         }
1072                         ins->objectid = last_block > search_start ?
1073                                         last_block : search_start;
1074                         ins->offset = search_end - ins->objectid;
1075                         goto check_pending;
1076                 }
1077
1078                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
1079                 if (key.objectid >= search_start && key.objectid > last_block &&
1080                     start_found) {
1081                         if (last_block < search_start)
1082                                 last_block = search_start;
1083                         hole_size = key.objectid - last_block;
1084                         if (hole_size >= num_blocks) {
1085                                 ins->objectid = last_block;
1086                                 ins->offset = hole_size;
1087                                 goto check_pending;
1088                         }
1089                 }
1090
1091                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
1092                         goto next;
1093
1094                 start_found = 1;
1095                 last_block = key.objectid + key.offset;
1096                 if (!full_scan && last_block >= block_group->key.objectid +
1097                     block_group->key.offset) {
1098                         btrfs_release_path(root, path);
1099                         search_start = block_group->key.objectid +
1100                                 block_group->key.offset * 2;
1101                         goto new_group;
1102                 }
1103 next:
1104                 path->slots[0]++;
1105                 cond_resched();
1106         }
1107 check_pending:
1108         /* we have to make sure we didn't find an extent that has already
1109          * been allocated by the map tree or the original allocation
1110          */
1111         btrfs_release_path(root, path);
1112         BUG_ON(ins->objectid < search_start);
1113
1114         if (ins->objectid + num_blocks >= search_end)
1115                 goto enospc;
1116
1117         for (test_block = ins->objectid;
1118              test_block < ins->objectid + num_blocks; test_block++) {
1119                 if (test_radix_bit(&info->pinned_radix, test_block) ||
1120                     test_radix_bit(&info->extent_ins_radix, test_block)) {
1121                         search_start = test_block + 1;
1122                         goto new_group;
1123                 }
1124         }
1125         if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start &&
1126             ins->objectid < exclude_start + exclude_nr)) {
1127                 search_start = exclude_start + exclude_nr;
1128                 goto new_group;
1129         }
1130         if (!data) {
1131                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1132                 if (block_group)
1133                         trans->block_group = block_group;
1134         }
1135         ins->offset = num_blocks;
1136         btrfs_free_path(path);
1137         return 0;
1138
1139 new_group:
1140         if (search_start + num_blocks >= search_end) {
1141 enospc:
1142                 search_start = orig_search_start;
1143                 if (full_scan) {
1144                         ret = -ENOSPC;
1145                         goto error;
1146                 }
1147                 if (wrapped) {
1148                         if (!full_scan)
1149                                 total_needed -= empty_size;
1150                         full_scan = 1;
1151                 } else
1152                         wrapped = 1;
1153         }
1154         block_group = btrfs_lookup_block_group(info, search_start);
1155         cond_resched();
1156         if (!full_scan)
1157                 block_group = btrfs_find_block_group(root, block_group,
1158                                                      search_start, data, 0);
1159         goto check_failed;
1160
1161 error:
1162         btrfs_release_path(root, path);
1163         btrfs_free_path(path);
1164         return ret;
1165 }
1166 /*
1167  * finds a free extent and does all the dirty work required for allocation
1168  * returns the key for the extent through ins, and a tree buffer for
1169  * the first block of the extent through buf.
1170  *
1171  * returns 0 if everything worked, non-zero otherwise.
1172  */
1173 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1174                        struct btrfs_root *root, u64 owner,
1175                        u64 num_blocks, u64 empty_size, u64 hint_block,
1176                        u64 search_end, struct btrfs_key *ins, int data)
1177 {
1178         int ret;
1179         int pending_ret;
1180         u64 super_blocks_used, root_blocks_used;
1181         u64 search_start = 0;
1182         struct btrfs_fs_info *info = root->fs_info;
1183         struct btrfs_root *extent_root = info->extent_root;
1184         struct btrfs_extent_item extent_item;
1185
1186         btrfs_set_extent_refs(&extent_item, 1);
1187         btrfs_set_extent_owner(&extent_item, owner);
1188
1189         WARN_ON(num_blocks < 1);
1190         ret = find_free_extent(trans, root, num_blocks, empty_size,
1191                                search_start, search_end, hint_block, ins,
1192                                trans->alloc_exclude_start,
1193                                trans->alloc_exclude_nr, data);
1194         BUG_ON(ret);
1195         if (ret)
1196                 return ret;
1197
1198         /* block accounting for super block */
1199         super_blocks_used = btrfs_super_blocks_used(&info->super_copy);
1200         btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used +
1201                                     num_blocks);
1202
1203         /* block accounting for root item */
1204         root_blocks_used = btrfs_root_blocks_used(&root->root_item);
1205         btrfs_set_root_blocks_used(&root->root_item, root_blocks_used +
1206                                    num_blocks);
1207
1208         if (root == extent_root) {
1209                 BUG_ON(num_blocks != 1);
1210                 set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid);
1211                 goto update_block;
1212         }
1213
1214         WARN_ON(trans->alloc_exclude_nr);
1215         trans->alloc_exclude_start = ins->objectid;
1216         trans->alloc_exclude_nr = ins->offset;
1217         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1218                                 sizeof(extent_item));
1219
1220         trans->alloc_exclude_start = 0;
1221         trans->alloc_exclude_nr = 0;
1222
1223         BUG_ON(ret);
1224         finish_current_insert(trans, extent_root);
1225         pending_ret = del_pending_extents(trans, extent_root);
1226         if (ret) {
1227                 return ret;
1228         }
1229         if (pending_ret) {
1230                 return pending_ret;
1231         }
1232
1233 update_block:
1234         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1235                                  data);
1236         BUG_ON(ret);
1237         return 0;
1238 }
1239
1240 /*
1241  * helper function to allocate a block for a given tree
1242  * returns the tree buffer or NULL.
1243  */
1244 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1245                                            struct btrfs_root *root, u64 hint,
1246                                            u64 empty_size)
1247 {
1248         struct btrfs_key ins;
1249         int ret;
1250         struct buffer_head *buf;
1251
1252         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1253                                  1, empty_size, hint, (u64)-1, &ins, 0);
1254         if (ret) {
1255                 BUG_ON(ret > 0);
1256                 return ERR_PTR(ret);
1257         }
1258         buf = btrfs_find_create_tree_block(root, ins.objectid);
1259         if (!buf) {
1260                 btrfs_free_extent(trans, root, ins.objectid, 1, 0);
1261                 return ERR_PTR(-ENOMEM);
1262         }
1263         WARN_ON(buffer_dirty(buf));
1264         set_buffer_uptodate(buf);
1265         set_buffer_checked(buf);
1266         set_buffer_defrag(buf);
1267         set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1268         trans->blocks_used++;
1269         return buf;
1270 }
1271
1272 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1273                          struct btrfs_root *root, struct buffer_head *cur)
1274 {
1275         struct btrfs_disk_key *key;
1276         struct btrfs_leaf *leaf;
1277         struct btrfs_file_extent_item *fi;
1278         int i;
1279         int nritems;
1280         int ret;
1281
1282         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
1283         leaf = btrfs_buffer_leaf(cur);
1284         nritems = btrfs_header_nritems(&leaf->header);
1285         for (i = 0; i < nritems; i++) {
1286                 u64 disk_blocknr;
1287                 key = &leaf->items[i].key;
1288                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
1289                         continue;
1290                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1291                 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
1292                         continue;
1293                 /*
1294                  * FIXME make sure to insert a trans record that
1295                  * repeats the snapshot del on crash
1296                  */
1297                 disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
1298                 if (disk_blocknr == 0)
1299                         continue;
1300                 ret = btrfs_free_extent(trans, root, disk_blocknr,
1301                                         btrfs_file_extent_disk_num_blocks(fi),
1302                                         0);
1303                 BUG_ON(ret);
1304         }
1305         return 0;
1306 }
1307
1308 static void reada_walk_down(struct btrfs_root *root,
1309                             struct btrfs_node *node)
1310 {
1311         int i;
1312         u32 nritems;
1313         u64 blocknr;
1314         int ret;
1315         u32 refs;
1316
1317         nritems = btrfs_header_nritems(&node->header);
1318         for (i = 0; i < nritems; i++) {
1319                 blocknr = btrfs_node_blockptr(node, i);
1320                 ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs);
1321                 BUG_ON(ret);
1322                 if (refs != 1)
1323                         continue;
1324                 mutex_unlock(&root->fs_info->fs_mutex);
1325                 ret = readahead_tree_block(root, blocknr);
1326                 cond_resched();
1327                 mutex_lock(&root->fs_info->fs_mutex);
1328                 if (ret)
1329                         break;
1330         }
1331 }
1332
1333 /*
1334  * helper function for drop_snapshot, this walks down the tree dropping ref
1335  * counts as it goes.
1336  */
1337 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1338                           *root, struct btrfs_path *path, int *level)
1339 {
1340         struct buffer_head *next;
1341         struct buffer_head *cur;
1342         u64 blocknr;
1343         int ret;
1344         u32 refs;
1345
1346         WARN_ON(*level < 0);
1347         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1348         ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1349                                1, &refs);
1350         BUG_ON(ret);
1351         if (refs > 1)
1352                 goto out;
1353
1354         /*
1355          * walk down to the last node level and free all the leaves
1356          */
1357         while(*level >= 0) {
1358                 WARN_ON(*level < 0);
1359                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1360                 cur = path->nodes[*level];
1361
1362                 if (*level > 0 && path->slots[*level] == 0)
1363                         reada_walk_down(root, btrfs_buffer_node(cur));
1364
1365                 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
1366                         WARN_ON(1);
1367
1368                 if (path->slots[*level] >=
1369                     btrfs_header_nritems(btrfs_buffer_header(cur)))
1370                         break;
1371                 if (*level == 0) {
1372                         ret = drop_leaf_ref(trans, root, cur);
1373                         BUG_ON(ret);
1374                         break;
1375                 }
1376                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
1377                                               path->slots[*level]);
1378                 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1379                 BUG_ON(ret);
1380                 if (refs != 1) {
1381                         path->slots[*level]++;
1382                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
1383                         BUG_ON(ret);
1384                         continue;
1385                 }
1386                 next = btrfs_find_tree_block(root, blocknr);
1387                 if (!next || !buffer_uptodate(next)) {
1388                         brelse(next);
1389                         mutex_unlock(&root->fs_info->fs_mutex);
1390                         next = read_tree_block(root, blocknr);
1391                         mutex_lock(&root->fs_info->fs_mutex);
1392
1393                         /* we dropped the lock, check one more time */
1394                         ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1395                         BUG_ON(ret);
1396                         if (refs != 1) {
1397                                 path->slots[*level]++;
1398                                 brelse(next);
1399                                 ret = btrfs_free_extent(trans, root,
1400                                                         blocknr, 1, 1);
1401                                 BUG_ON(ret);
1402                                 continue;
1403                         }
1404                 }
1405                 WARN_ON(*level <= 0);
1406                 if (path->nodes[*level-1])
1407                         btrfs_block_release(root, path->nodes[*level-1]);
1408                 path->nodes[*level-1] = next;
1409                 *level = btrfs_header_level(btrfs_buffer_header(next));
1410                 path->slots[*level] = 0;
1411         }
1412 out:
1413         WARN_ON(*level < 0);
1414         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1415         ret = btrfs_free_extent(trans, root,
1416                                 bh_blocknr(path->nodes[*level]), 1, 1);
1417         btrfs_block_release(root, path->nodes[*level]);
1418         path->nodes[*level] = NULL;
1419         *level += 1;
1420         BUG_ON(ret);
1421         return 0;
1422 }
1423
1424 /*
1425  * helper for dropping snapshots.  This walks back up the tree in the path
1426  * to find the first node higher up where we haven't yet gone through
1427  * all the slots
1428  */
1429 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1430                         *root, struct btrfs_path *path, int *level)
1431 {
1432         int i;
1433         int slot;
1434         int ret;
1435         struct btrfs_root_item *root_item = &root->root_item;
1436
1437         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1438                 slot = path->slots[i];
1439                 if (slot < btrfs_header_nritems(
1440                     btrfs_buffer_header(path->nodes[i])) - 1) {
1441                         struct btrfs_node *node;
1442                         node = btrfs_buffer_node(path->nodes[i]);
1443                         path->slots[i]++;
1444                         *level = i;
1445                         WARN_ON(*level == 0);
1446                         memcpy(&root_item->drop_progress,
1447                                &node->ptrs[path->slots[i]].key,
1448                                sizeof(root_item->drop_progress));
1449                         root_item->drop_level = i;
1450                         return 0;
1451                 } else {
1452                         ret = btrfs_free_extent(trans, root,
1453                                                 bh_blocknr(path->nodes[*level]),
1454                                                 1, 1);
1455                         BUG_ON(ret);
1456                         btrfs_block_release(root, path->nodes[*level]);
1457                         path->nodes[*level] = NULL;
1458                         *level = i + 1;
1459                 }
1460         }
1461         return 1;
1462 }
1463
1464 /*
1465  * drop the reference count on the tree rooted at 'snap'.  This traverses
1466  * the tree freeing any blocks that have a ref count of zero after being
1467  * decremented.
1468  */
1469 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1470                         *root)
1471 {
1472         int ret = 0;
1473         int wret;
1474         int level;
1475         struct btrfs_path *path;
1476         int i;
1477         int orig_level;
1478         struct btrfs_root_item *root_item = &root->root_item;
1479
1480         path = btrfs_alloc_path();
1481         BUG_ON(!path);
1482
1483         level = btrfs_header_level(btrfs_buffer_header(root->node));
1484         orig_level = level;
1485         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1486                 path->nodes[level] = root->node;
1487                 path->slots[level] = 0;
1488         } else {
1489                 struct btrfs_key key;
1490                 struct btrfs_disk_key *found_key;
1491                 struct btrfs_node *node;
1492
1493                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1494                 level = root_item->drop_level;
1495                 path->lowest_level = level;
1496                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1497                 if (wret < 0) {
1498                         ret = wret;
1499                         goto out;
1500                 }
1501                 node = btrfs_buffer_node(path->nodes[level]);
1502                 found_key = &node->ptrs[path->slots[level]].key;
1503                 WARN_ON(memcmp(found_key, &root_item->drop_progress,
1504                                sizeof(*found_key)));
1505         }
1506         while(1) {
1507                 wret = walk_down_tree(trans, root, path, &level);
1508                 if (wret > 0)
1509                         break;
1510                 if (wret < 0)
1511                         ret = wret;
1512
1513                 wret = walk_up_tree(trans, root, path, &level);
1514                 if (wret > 0)
1515                         break;
1516                 if (wret < 0)
1517                         ret = wret;
1518                 ret = -EAGAIN;
1519                 get_bh(root->node);
1520                 break;
1521         }
1522         for (i = 0; i <= orig_level; i++) {
1523                 if (path->nodes[i]) {
1524                         btrfs_block_release(root, path->nodes[i]);
1525                         path->nodes[i] = 0;
1526                 }
1527         }
1528 out:
1529         btrfs_free_path(path);
1530         return ret;
1531 }
1532
1533 static int free_block_group_radix(struct radix_tree_root *radix)
1534 {
1535         int ret;
1536         struct btrfs_block_group_cache *cache[8];
1537         int i;
1538
1539         while(1) {
1540                 ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
1541                                              ARRAY_SIZE(cache));
1542                 if (!ret)
1543                         break;
1544                 for (i = 0; i < ret; i++) {
1545                         radix_tree_delete(radix, cache[i]->key.objectid +
1546                                           cache[i]->key.offset - 1);
1547                         kfree(cache[i]);
1548                 }
1549         }
1550         return 0;
1551 }
1552
1553 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1554 {
1555         int ret;
1556         int ret2;
1557         unsigned long gang[16];
1558         int i;
1559
1560         ret = free_block_group_radix(&info->block_group_radix);
1561         ret2 = free_block_group_radix(&info->block_group_data_radix);
1562         if (ret)
1563                 return ret;
1564         if (ret2)
1565                 return ret2;
1566
1567         while(1) {
1568                 ret = find_first_radix_bit(&info->extent_map_radix,
1569                                            gang, 0, ARRAY_SIZE(gang));
1570                 if (!ret)
1571                         break;
1572                 for (i = 0; i < ret; i++) {
1573                         clear_radix_bit(&info->extent_map_radix, gang[i]);
1574                 }
1575         }
1576         return 0;
1577 }
1578
1579 int btrfs_read_block_groups(struct btrfs_root *root)
1580 {
1581         struct btrfs_path *path;
1582         int ret;
1583         int err = 0;
1584         struct btrfs_block_group_item *bi;
1585         struct btrfs_block_group_cache *cache;
1586         struct btrfs_fs_info *info = root->fs_info;
1587         struct radix_tree_root *radix;
1588         struct btrfs_key key;
1589         struct btrfs_key found_key;
1590         struct btrfs_leaf *leaf;
1591         u64 group_size_blocks;
1592         u64 used;
1593
1594         group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >>
1595                 root->fs_info->sb->s_blocksize_bits;
1596         root = info->extent_root;
1597         key.objectid = 0;
1598         key.offset = group_size_blocks;
1599         key.flags = 0;
1600         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1601
1602         path = btrfs_alloc_path();
1603         if (!path)
1604                 return -ENOMEM;
1605
1606         while(1) {
1607                 ret = btrfs_search_slot(NULL, info->extent_root,
1608                                         &key, path, 0, 0);
1609                 if (ret != 0) {
1610                         err = ret;
1611                         break;
1612                 }
1613                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1614                 btrfs_disk_key_to_cpu(&found_key,
1615                                       &leaf->items[path->slots[0]].key);
1616                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1617                 if (!cache) {
1618                         err = -1;
1619                         break;
1620                 }
1621
1622                 bi = btrfs_item_ptr(leaf, path->slots[0],
1623                                     struct btrfs_block_group_item);
1624                 if (bi->flags & BTRFS_BLOCK_GROUP_DATA) {
1625                         radix = &info->block_group_data_radix;
1626                         cache->data = 1;
1627                 } else {
1628                         radix = &info->block_group_radix;
1629                         cache->data = 0;
1630                 }
1631
1632                 memcpy(&cache->item, bi, sizeof(*bi));
1633                 memcpy(&cache->key, &found_key, sizeof(found_key));
1634                 cache->last_alloc = cache->key.objectid;
1635                 cache->first_free = cache->key.objectid;
1636                 cache->pinned = 0;
1637                 cache->cached = 0;
1638
1639                 cache->radix = radix;
1640
1641                 key.objectid = found_key.objectid + found_key.offset;
1642                 btrfs_release_path(root, path);
1643                 ret = radix_tree_insert(radix, found_key.objectid +
1644                                         found_key.offset - 1,
1645                                         (void *)cache);
1646                 BUG_ON(ret);
1647                 used = btrfs_block_group_used(bi);
1648                 if (used < div_factor(key.offset, 8)) {
1649                         radix_tree_tag_set(radix, found_key.objectid +
1650                                            found_key.offset - 1,
1651                                            BTRFS_BLOCK_GROUP_AVAIL);
1652                 }
1653                 if (key.objectid >=
1654                     btrfs_super_total_blocks(&info->super_copy))
1655                         break;
1656         }
1657
1658         btrfs_free_path(path);
1659         return 0;
1660 }