Btrfs: Properly find the root for snapshotted blocks during chunk relocation
[safe/jmp/linux-2.6] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include "hash.h"
22 #include "crc32c.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "volumes.h"
28
29 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
30 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
31 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
32
33 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
34
35 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
36                                  btrfs_root *extent_root);
37 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
38                                btrfs_root *extent_root);
39
40
41 static int cache_block_group(struct btrfs_root *root,
42                              struct btrfs_block_group_cache *block_group)
43 {
44         struct btrfs_path *path;
45         int ret;
46         struct btrfs_key key;
47         struct extent_buffer *leaf;
48         struct extent_io_tree *free_space_cache;
49         int slot;
50         u64 last = 0;
51         u64 hole_size;
52         u64 first_free;
53         int found = 0;
54
55         if (!block_group)
56                 return 0;
57
58         root = root->fs_info->extent_root;
59         free_space_cache = &root->fs_info->free_space_cache;
60
61         if (block_group->cached)
62                 return 0;
63
64         path = btrfs_alloc_path();
65         if (!path)
66                 return -ENOMEM;
67
68         path->reada = 2;
69         first_free = block_group->key.objectid;
70         key.objectid = block_group->key.objectid;
71         key.offset = 0;
72         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
73         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
74         if (ret < 0)
75                 return ret;
76         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
77         if (ret < 0)
78                 return ret;
79         if (ret == 0) {
80                 leaf = path->nodes[0];
81                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
82                 if (key.objectid + key.offset > first_free)
83                         first_free = key.objectid + key.offset;
84         }
85         while(1) {
86                 leaf = path->nodes[0];
87                 slot = path->slots[0];
88                 if (slot >= btrfs_header_nritems(leaf)) {
89                         ret = btrfs_next_leaf(root, path);
90                         if (ret < 0)
91                                 goto err;
92                         if (ret == 0) {
93                                 continue;
94                         } else {
95                                 break;
96                         }
97                 }
98                 btrfs_item_key_to_cpu(leaf, &key, slot);
99                 if (key.objectid < block_group->key.objectid) {
100                         goto next;
101                 }
102                 if (key.objectid >= block_group->key.objectid +
103                     block_group->key.offset) {
104                         break;
105                 }
106
107                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
108                         if (!found) {
109                                 last = first_free;
110                                 found = 1;
111                         }
112                         if (key.objectid > last) {
113                                 hole_size = key.objectid - last;
114                                 set_extent_dirty(free_space_cache, last,
115                                                  last + hole_size - 1,
116                                                  GFP_NOFS);
117                         }
118                         last = key.objectid + key.offset;
119                 }
120 next:
121                 path->slots[0]++;
122         }
123
124         if (!found)
125                 last = first_free;
126         if (block_group->key.objectid +
127             block_group->key.offset > last) {
128                 hole_size = block_group->key.objectid +
129                         block_group->key.offset - last;
130                 set_extent_dirty(free_space_cache, last,
131                                  last + hole_size - 1, GFP_NOFS);
132         }
133         block_group->cached = 1;
134 err:
135         btrfs_free_path(path);
136         return 0;
137 }
138
139 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
140                                                          btrfs_fs_info *info,
141                                                          u64 bytenr)
142 {
143         struct extent_io_tree *block_group_cache;
144         struct btrfs_block_group_cache *block_group = NULL;
145         u64 ptr;
146         u64 start;
147         u64 end;
148         int ret;
149
150         bytenr = max_t(u64, bytenr,
151                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
152         block_group_cache = &info->block_group_cache;
153         ret = find_first_extent_bit(block_group_cache,
154                                     bytenr, &start, &end,
155                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
156                                     BLOCK_GROUP_SYSTEM);
157         if (ret) {
158                 return NULL;
159         }
160         ret = get_state_private(block_group_cache, start, &ptr);
161         if (ret)
162                 return NULL;
163
164         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
165         if (block_group->key.objectid <= bytenr && bytenr <
166             block_group->key.objectid + block_group->key.offset)
167                 return block_group;
168         return NULL;
169 }
170
171 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
172 {
173         return (cache->flags & bits) == bits;
174 }
175
176 static int noinline find_search_start(struct btrfs_root *root,
177                               struct btrfs_block_group_cache **cache_ret,
178                               u64 *start_ret, int num, int data)
179 {
180         int ret;
181         struct btrfs_block_group_cache *cache = *cache_ret;
182         struct extent_io_tree *free_space_cache;
183         struct extent_state *state;
184         u64 last;
185         u64 start = 0;
186         u64 cache_miss = 0;
187         u64 total_fs_bytes;
188         u64 search_start = *start_ret;
189         int wrapped = 0;
190
191         if (!cache)
192                 goto out;
193
194         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
195         free_space_cache = &root->fs_info->free_space_cache;
196
197 again:
198         ret = cache_block_group(root, cache);
199         if (ret)
200                 goto out;
201
202         last = max(search_start, cache->key.objectid);
203         if (!block_group_bits(cache, data) || cache->ro) {
204                 goto new_group;
205         }
206
207         spin_lock_irq(&free_space_cache->lock);
208         state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
209         while(1) {
210                 if (!state) {
211                         if (!cache_miss)
212                                 cache_miss = last;
213                         spin_unlock_irq(&free_space_cache->lock);
214                         goto new_group;
215                 }
216
217                 start = max(last, state->start);
218                 last = state->end + 1;
219                 if (last - start < num) {
220                         if (last == cache->key.objectid + cache->key.offset)
221                                 cache_miss = start;
222                         do {
223                                 state = extent_state_next(state);
224                         } while(state && !(state->state & EXTENT_DIRTY));
225                         continue;
226                 }
227                 spin_unlock_irq(&free_space_cache->lock);
228                 if (cache->ro)
229                         goto new_group;
230                 if (start + num > cache->key.objectid + cache->key.offset)
231                         goto new_group;
232                 if (start + num  > total_fs_bytes)
233                         goto new_group;
234                 if (!block_group_bits(cache, data)) {
235                         printk("block group bits don't match %Lu %d\n", cache->flags, data);
236                 }
237                 *start_ret = start;
238                 return 0;
239         }
240 out:
241         cache = btrfs_lookup_block_group(root->fs_info, search_start);
242         if (!cache) {
243                 printk("Unable to find block group for %Lu\n", search_start);
244                 WARN_ON(1);
245         }
246         return -ENOSPC;
247
248 new_group:
249         last = cache->key.objectid + cache->key.offset;
250 wrapped:
251         cache = btrfs_lookup_block_group(root->fs_info, last);
252         if (!cache || cache->key.objectid >= total_fs_bytes) {
253 no_cache:
254                 if (!wrapped) {
255                         wrapped = 1;
256                         last = search_start;
257                         goto wrapped;
258                 }
259                 goto out;
260         }
261         if (cache_miss && !cache->cached) {
262                 cache_block_group(root, cache);
263                 last = cache_miss;
264                 cache = btrfs_lookup_block_group(root->fs_info, last);
265         }
266         cache = btrfs_find_block_group(root, cache, last, data, 0);
267         if (!cache)
268                 goto no_cache;
269         *cache_ret = cache;
270         cache_miss = 0;
271         goto again;
272 }
273
274 static u64 div_factor(u64 num, int factor)
275 {
276         if (factor == 10)
277                 return num;
278         num *= factor;
279         do_div(num, 10);
280         return num;
281 }
282
283 static int block_group_state_bits(u64 flags)
284 {
285         int bits = 0;
286         if (flags & BTRFS_BLOCK_GROUP_DATA)
287                 bits |= BLOCK_GROUP_DATA;
288         if (flags & BTRFS_BLOCK_GROUP_METADATA)
289                 bits |= BLOCK_GROUP_METADATA;
290         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
291                 bits |= BLOCK_GROUP_SYSTEM;
292         return bits;
293 }
294
295 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
296                                                  struct btrfs_block_group_cache
297                                                  *hint, u64 search_start,
298                                                  int data, int owner)
299 {
300         struct btrfs_block_group_cache *cache;
301         struct extent_io_tree *block_group_cache;
302         struct btrfs_block_group_cache *found_group = NULL;
303         struct btrfs_fs_info *info = root->fs_info;
304         u64 used;
305         u64 last = 0;
306         u64 hint_last;
307         u64 start;
308         u64 end;
309         u64 free_check;
310         u64 ptr;
311         u64 total_fs_bytes;
312         int bit;
313         int ret;
314         int full_search = 0;
315         int factor = 10;
316
317         block_group_cache = &info->block_group_cache;
318         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
319
320         if (data & BTRFS_BLOCK_GROUP_METADATA)
321                 factor = 9;
322
323         bit = block_group_state_bits(data);
324
325         if (search_start && search_start < total_fs_bytes) {
326                 struct btrfs_block_group_cache *shint;
327                 shint = btrfs_lookup_block_group(info, search_start);
328                 if (shint && block_group_bits(shint, data) && !shint->ro) {
329                         used = btrfs_block_group_used(&shint->item);
330                         if (used + shint->pinned <
331                             div_factor(shint->key.offset, factor)) {
332                                 return shint;
333                         }
334                 }
335         }
336         if (hint && !hint->ro && block_group_bits(hint, data) &&
337             hint->key.objectid < total_fs_bytes) {
338                 used = btrfs_block_group_used(&hint->item);
339                 if (used + hint->pinned <
340                     div_factor(hint->key.offset, factor)) {
341                         return hint;
342                 }
343                 last = hint->key.objectid + hint->key.offset;
344                 hint_last = last;
345         } else {
346                 if (hint)
347                         hint_last = max(hint->key.objectid, search_start);
348                 else
349                         hint_last = search_start;
350
351                 if (hint_last >= total_fs_bytes)
352                         hint_last = search_start;
353                 last = hint_last;
354         }
355 again:
356         while(1) {
357                 ret = find_first_extent_bit(block_group_cache, last,
358                                             &start, &end, bit);
359                 if (ret)
360                         break;
361
362                 ret = get_state_private(block_group_cache, start, &ptr);
363                 if (ret)
364                         break;
365
366                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
367                 last = cache->key.objectid + cache->key.offset;
368                 used = btrfs_block_group_used(&cache->item);
369
370                 if (cache->key.objectid > total_fs_bytes)
371                         break;
372
373                 if (!cache->ro && block_group_bits(cache, data)) {
374                         if (full_search)
375                                 free_check = cache->key.offset;
376                         else
377                                 free_check = div_factor(cache->key.offset,
378                                                         factor);
379
380                         if (used + cache->pinned < free_check) {
381                                 found_group = cache;
382                                 goto found;
383                         }
384                 }
385                 cond_resched();
386         }
387         if (!full_search) {
388                 last = search_start;
389                 full_search = 1;
390                 goto again;
391         }
392 found:
393         return found_group;
394 }
395
396 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
397                            u64 owner, u64 owner_offset)
398 {
399         u32 high_crc = ~(u32)0;
400         u32 low_crc = ~(u32)0;
401         __le64 lenum;
402         lenum = cpu_to_le64(root_objectid);
403         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
404         lenum = cpu_to_le64(ref_generation);
405         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
406         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
407                 lenum = cpu_to_le64(owner);
408                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
409                 lenum = cpu_to_le64(owner_offset);
410                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
411         }
412         return ((u64)high_crc << 32) | (u64)low_crc;
413 }
414
415 static int match_extent_ref(struct extent_buffer *leaf,
416                             struct btrfs_extent_ref *disk_ref,
417                             struct btrfs_extent_ref *cpu_ref)
418 {
419         int ret;
420         int len;
421
422         if (cpu_ref->objectid)
423                 len = sizeof(*cpu_ref);
424         else
425                 len = 2 * sizeof(u64);
426         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
427                                    len);
428         return ret == 0;
429 }
430
431 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
432                                           struct btrfs_root *root,
433                                           struct btrfs_path *path, u64 bytenr,
434                                           u64 root_objectid,
435                                           u64 ref_generation, u64 owner,
436                                           u64 owner_offset, int del)
437 {
438         u64 hash;
439         struct btrfs_key key;
440         struct btrfs_key found_key;
441         struct btrfs_extent_ref ref;
442         struct extent_buffer *leaf;
443         struct btrfs_extent_ref *disk_ref;
444         int ret;
445         int ret2;
446
447         btrfs_set_stack_ref_root(&ref, root_objectid);
448         btrfs_set_stack_ref_generation(&ref, ref_generation);
449         btrfs_set_stack_ref_objectid(&ref, owner);
450         btrfs_set_stack_ref_offset(&ref, owner_offset);
451
452         hash = hash_extent_ref(root_objectid, ref_generation, owner,
453                                owner_offset);
454         key.offset = hash;
455         key.objectid = bytenr;
456         key.type = BTRFS_EXTENT_REF_KEY;
457
458         while (1) {
459                 ret = btrfs_search_slot(trans, root, &key, path,
460                                         del ? -1 : 0, del);
461                 if (ret < 0)
462                         goto out;
463                 leaf = path->nodes[0];
464                 if (ret != 0) {
465                         u32 nritems = btrfs_header_nritems(leaf);
466                         if (path->slots[0] >= nritems) {
467                                 ret2 = btrfs_next_leaf(root, path);
468                                 if (ret2)
469                                         goto out;
470                                 leaf = path->nodes[0];
471                         }
472                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
473                         if (found_key.objectid != bytenr ||
474                             found_key.type != BTRFS_EXTENT_REF_KEY)
475                                 goto out;
476                         key.offset = found_key.offset;
477                         if (del) {
478                                 btrfs_release_path(root, path);
479                                 continue;
480                         }
481                 }
482                 disk_ref = btrfs_item_ptr(path->nodes[0],
483                                           path->slots[0],
484                                           struct btrfs_extent_ref);
485                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
486                         ret = 0;
487                         goto out;
488                 }
489                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
490                 key.offset = found_key.offset + 1;
491                 btrfs_release_path(root, path);
492         }
493 out:
494         return ret;
495 }
496
497 /*
498  * Back reference rules.  Back refs have three main goals:
499  *
500  * 1) differentiate between all holders of references to an extent so that
501  *    when a reference is dropped we can make sure it was a valid reference
502  *    before freeing the extent.
503  *
504  * 2) Provide enough information to quickly find the holders of an extent
505  *    if we notice a given block is corrupted or bad.
506  *
507  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
508  *    maintenance.  This is actually the same as #2, but with a slightly
509  *    different use case.
510  *
511  * File extents can be referenced by:
512  *
513  * - multiple snapshots, subvolumes, or different generations in one subvol
514  * - different files inside a single subvolume (in theory, not implemented yet)
515  * - different offsets inside a file (bookend extents in file.c)
516  *
517  * The extent ref structure has fields for:
518  *
519  * - Objectid of the subvolume root
520  * - Generation number of the tree holding the reference
521  * - objectid of the file holding the reference
522  * - offset in the file corresponding to the key holding the reference
523  *
524  * When a file extent is allocated the fields are filled in:
525  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
526  *
527  * When a leaf is cow'd new references are added for every file extent found
528  * in the leaf.  It looks the same as the create case, but trans->transid
529  * will be different when the block is cow'd.
530  *
531  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
532  *
533  * When a file extent is removed either during snapshot deletion or file
534  * truncation, the corresponding back reference is found
535  * by searching for:
536  *
537  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
538  *      inode objectid, offset in file)
539  *
540  * Btree extents can be referenced by:
541  *
542  * - Different subvolumes
543  * - Different generations of the same subvolume
544  *
545  * Storing sufficient information for a full reverse mapping of a btree
546  * block would require storing the lowest key of the block in the backref,
547  * and it would require updating that lowest key either before write out or
548  * every time it changed.  Instead, the objectid of the lowest key is stored
549  * along with the level of the tree block.  This provides a hint
550  * about where in the btree the block can be found.  Searches through the
551  * btree only need to look for a pointer to that block, so they stop one
552  * level higher than the level recorded in the backref.
553  *
554  * Some btrees do not do reference counting on their extents.  These
555  * include the extent tree and the tree of tree roots.  Backrefs for these
556  * trees always have a generation of zero.
557  *
558  * When a tree block is created, back references are inserted:
559  *
560  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
561  *
562  * When a tree block is cow'd in a reference counted root,
563  * new back references are added for all the blocks it points to.
564  * These are of the form (trans->transid will have increased since creation):
565  *
566  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
567  *
568  * Because the lowest_key_objectid and the level are just hints
569  * they are not used when backrefs are deleted.  When a backref is deleted:
570  *
571  * if backref was for a tree root:
572  *     root_objectid = root->root_key.objectid
573  * else
574  *     root_objectid = btrfs_header_owner(parent)
575  *
576  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
577  *
578  * Back Reference Key hashing:
579  *
580  * Back references have four fields, each 64 bits long.  Unfortunately,
581  * This is hashed into a single 64 bit number and placed into the key offset.
582  * The key objectid corresponds to the first byte in the extent, and the
583  * key type is set to BTRFS_EXTENT_REF_KEY
584  */
585 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
586                                  struct btrfs_root *root,
587                                  struct btrfs_path *path, u64 bytenr,
588                                  u64 root_objectid, u64 ref_generation,
589                                  u64 owner, u64 owner_offset)
590 {
591         u64 hash;
592         struct btrfs_key key;
593         struct btrfs_extent_ref ref;
594         struct btrfs_extent_ref *disk_ref;
595         int ret;
596
597         btrfs_set_stack_ref_root(&ref, root_objectid);
598         btrfs_set_stack_ref_generation(&ref, ref_generation);
599         btrfs_set_stack_ref_objectid(&ref, owner);
600         btrfs_set_stack_ref_offset(&ref, owner_offset);
601
602         hash = hash_extent_ref(root_objectid, ref_generation, owner,
603                                owner_offset);
604         key.offset = hash;
605         key.objectid = bytenr;
606         key.type = BTRFS_EXTENT_REF_KEY;
607
608         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
609         while (ret == -EEXIST) {
610                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
611                                           struct btrfs_extent_ref);
612                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
613                         goto out;
614                 key.offset++;
615                 btrfs_release_path(root, path);
616                 ret = btrfs_insert_empty_item(trans, root, path, &key,
617                                               sizeof(ref));
618         }
619         if (ret)
620                 goto out;
621         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
622                                   struct btrfs_extent_ref);
623         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
624                             sizeof(ref));
625         btrfs_mark_buffer_dirty(path->nodes[0]);
626 out:
627         btrfs_release_path(root, path);
628         return ret;
629 }
630
631 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
632                                 struct btrfs_root *root,
633                                 u64 bytenr, u64 num_bytes,
634                                 u64 root_objectid, u64 ref_generation,
635                                 u64 owner, u64 owner_offset)
636 {
637         struct btrfs_path *path;
638         int ret;
639         struct btrfs_key key;
640         struct extent_buffer *l;
641         struct btrfs_extent_item *item;
642         u32 refs;
643
644         WARN_ON(num_bytes < root->sectorsize);
645         path = btrfs_alloc_path();
646         if (!path)
647                 return -ENOMEM;
648
649         path->reada = 1;
650         key.objectid = bytenr;
651         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
652         key.offset = num_bytes;
653         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
654                                 0, 1);
655         if (ret < 0)
656                 return ret;
657         if (ret != 0) {
658                 BUG();
659         }
660         BUG_ON(ret != 0);
661         l = path->nodes[0];
662         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
663         refs = btrfs_extent_refs(l, item);
664         btrfs_set_extent_refs(l, item, refs + 1);
665         btrfs_mark_buffer_dirty(path->nodes[0]);
666
667         btrfs_release_path(root->fs_info->extent_root, path);
668
669         path->reada = 1;
670         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
671                                           path, bytenr, root_objectid,
672                                           ref_generation, owner, owner_offset);
673         BUG_ON(ret);
674         finish_current_insert(trans, root->fs_info->extent_root);
675         del_pending_extents(trans, root->fs_info->extent_root);
676
677         btrfs_free_path(path);
678         return 0;
679 }
680
681 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
682                          struct btrfs_root *root)
683 {
684         finish_current_insert(trans, root->fs_info->extent_root);
685         del_pending_extents(trans, root->fs_info->extent_root);
686         return 0;
687 }
688
689 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
690                              struct btrfs_root *root, u64 bytenr,
691                              u64 num_bytes, u32 *refs)
692 {
693         struct btrfs_path *path;
694         int ret;
695         struct btrfs_key key;
696         struct extent_buffer *l;
697         struct btrfs_extent_item *item;
698
699         WARN_ON(num_bytes < root->sectorsize);
700         path = btrfs_alloc_path();
701         path->reada = 1;
702         key.objectid = bytenr;
703         key.offset = num_bytes;
704         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
705         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
706                                 0, 0);
707         if (ret < 0)
708                 goto out;
709         if (ret != 0) {
710                 btrfs_print_leaf(root, path->nodes[0]);
711                 printk("failed to find block number %Lu\n", bytenr);
712                 BUG();
713         }
714         l = path->nodes[0];
715         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
716         *refs = btrfs_extent_refs(l, item);
717 out:
718         btrfs_free_path(path);
719         return 0;
720 }
721
722 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
723                                   struct btrfs_path *count_path,
724                                   u64 first_extent)
725 {
726         struct btrfs_root *extent_root = root->fs_info->extent_root;
727         struct btrfs_path *path;
728         u64 bytenr;
729         u64 found_objectid;
730         u64 root_objectid = root->root_key.objectid;
731         u32 total_count = 0;
732         u32 cur_count;
733         u32 nritems;
734         int ret;
735         struct btrfs_key key;
736         struct btrfs_key found_key;
737         struct extent_buffer *l;
738         struct btrfs_extent_item *item;
739         struct btrfs_extent_ref *ref_item;
740         int level = -1;
741
742         path = btrfs_alloc_path();
743 again:
744         if (level == -1)
745                 bytenr = first_extent;
746         else
747                 bytenr = count_path->nodes[level]->start;
748
749         cur_count = 0;
750         key.objectid = bytenr;
751         key.offset = 0;
752
753         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
754         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
755         if (ret < 0)
756                 goto out;
757         BUG_ON(ret == 0);
758
759         l = path->nodes[0];
760         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
761
762         if (found_key.objectid != bytenr ||
763             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
764                 goto out;
765         }
766
767         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
768         while (1) {
769                 l = path->nodes[0];
770                 nritems = btrfs_header_nritems(l);
771                 if (path->slots[0] >= nritems) {
772                         ret = btrfs_next_leaf(extent_root, path);
773                         if (ret == 0)
774                                 continue;
775                         break;
776                 }
777                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
778                 if (found_key.objectid != bytenr)
779                         break;
780
781                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
782                         path->slots[0]++;
783                         continue;
784                 }
785
786                 cur_count++;
787                 ref_item = btrfs_item_ptr(l, path->slots[0],
788                                           struct btrfs_extent_ref);
789                 found_objectid = btrfs_ref_root(l, ref_item);
790
791                 if (found_objectid != root_objectid) {
792                         total_count = 2;
793                         goto out;
794                 }
795                 total_count = 1;
796                 path->slots[0]++;
797         }
798         if (cur_count == 0) {
799                 total_count = 0;
800                 goto out;
801         }
802         if (level >= 0 && root->node == count_path->nodes[level])
803                 goto out;
804         level++;
805         btrfs_release_path(root, path);
806         goto again;
807
808 out:
809         btrfs_free_path(path);
810         return total_count;
811 }
812 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
813                        struct btrfs_root *root, u64 owner_objectid)
814 {
815         u64 generation;
816         u64 key_objectid;
817         u64 level;
818         u32 nritems;
819         struct btrfs_disk_key disk_key;
820
821         level = btrfs_header_level(root->node);
822         generation = trans->transid;
823         nritems = btrfs_header_nritems(root->node);
824         if (nritems > 0) {
825                 if (level == 0)
826                         btrfs_item_key(root->node, &disk_key, 0);
827                 else
828                         btrfs_node_key(root->node, &disk_key, 0);
829                 key_objectid = btrfs_disk_key_objectid(&disk_key);
830         } else {
831                 key_objectid = 0;
832         }
833         return btrfs_inc_extent_ref(trans, root, root->node->start,
834                                     root->node->len, owner_objectid,
835                                     generation, level, key_objectid);
836 }
837
838 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
839                   struct extent_buffer *buf)
840 {
841         u64 bytenr;
842         u32 nritems;
843         struct btrfs_key key;
844         struct btrfs_file_extent_item *fi;
845         int i;
846         int level;
847         int ret;
848         int faili;
849
850         if (!root->ref_cows)
851                 return 0;
852
853         level = btrfs_header_level(buf);
854         nritems = btrfs_header_nritems(buf);
855         for (i = 0; i < nritems; i++) {
856                 if (level == 0) {
857                         u64 disk_bytenr;
858                         btrfs_item_key_to_cpu(buf, &key, i);
859                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
860                                 continue;
861                         fi = btrfs_item_ptr(buf, i,
862                                             struct btrfs_file_extent_item);
863                         if (btrfs_file_extent_type(buf, fi) ==
864                             BTRFS_FILE_EXTENT_INLINE)
865                                 continue;
866                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
867                         if (disk_bytenr == 0)
868                                 continue;
869                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
870                                     btrfs_file_extent_disk_num_bytes(buf, fi),
871                                     root->root_key.objectid, trans->transid,
872                                     key.objectid, key.offset);
873                         if (ret) {
874                                 faili = i;
875                                 goto fail;
876                         }
877                 } else {
878                         bytenr = btrfs_node_blockptr(buf, i);
879                         btrfs_node_key_to_cpu(buf, &key, i);
880                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
881                                            btrfs_level_size(root, level - 1),
882                                            root->root_key.objectid,
883                                            trans->transid,
884                                            level - 1, key.objectid);
885                         if (ret) {
886                                 faili = i;
887                                 goto fail;
888                         }
889                 }
890         }
891         return 0;
892 fail:
893         WARN_ON(1);
894 #if 0
895         for (i =0; i < faili; i++) {
896                 if (level == 0) {
897                         u64 disk_bytenr;
898                         btrfs_item_key_to_cpu(buf, &key, i);
899                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
900                                 continue;
901                         fi = btrfs_item_ptr(buf, i,
902                                             struct btrfs_file_extent_item);
903                         if (btrfs_file_extent_type(buf, fi) ==
904                             BTRFS_FILE_EXTENT_INLINE)
905                                 continue;
906                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
907                         if (disk_bytenr == 0)
908                                 continue;
909                         err = btrfs_free_extent(trans, root, disk_bytenr,
910                                     btrfs_file_extent_disk_num_bytes(buf,
911                                                                       fi), 0);
912                         BUG_ON(err);
913                 } else {
914                         bytenr = btrfs_node_blockptr(buf, i);
915                         err = btrfs_free_extent(trans, root, bytenr,
916                                         btrfs_level_size(root, level - 1), 0);
917                         BUG_ON(err);
918                 }
919         }
920 #endif
921         return ret;
922 }
923
924 static int write_one_cache_group(struct btrfs_trans_handle *trans,
925                                  struct btrfs_root *root,
926                                  struct btrfs_path *path,
927                                  struct btrfs_block_group_cache *cache)
928 {
929         int ret;
930         int pending_ret;
931         struct btrfs_root *extent_root = root->fs_info->extent_root;
932         unsigned long bi;
933         struct extent_buffer *leaf;
934
935         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
936         if (ret < 0)
937                 goto fail;
938         BUG_ON(ret);
939
940         leaf = path->nodes[0];
941         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
942         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
943         btrfs_mark_buffer_dirty(leaf);
944         btrfs_release_path(extent_root, path);
945 fail:
946         finish_current_insert(trans, extent_root);
947         pending_ret = del_pending_extents(trans, extent_root);
948         if (ret)
949                 return ret;
950         if (pending_ret)
951                 return pending_ret;
952         return 0;
953
954 }
955
956 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
957                                    struct btrfs_root *root)
958 {
959         struct extent_io_tree *block_group_cache;
960         struct btrfs_block_group_cache *cache;
961         int ret;
962         int err = 0;
963         int werr = 0;
964         struct btrfs_path *path;
965         u64 last = 0;
966         u64 start;
967         u64 end;
968         u64 ptr;
969
970         block_group_cache = &root->fs_info->block_group_cache;
971         path = btrfs_alloc_path();
972         if (!path)
973                 return -ENOMEM;
974
975         while(1) {
976                 ret = find_first_extent_bit(block_group_cache, last,
977                                             &start, &end, BLOCK_GROUP_DIRTY);
978                 if (ret)
979                         break;
980
981                 last = end + 1;
982                 ret = get_state_private(block_group_cache, start, &ptr);
983                 if (ret)
984                         break;
985                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
986                 err = write_one_cache_group(trans, root,
987                                             path, cache);
988                 /*
989                  * if we fail to write the cache group, we want
990                  * to keep it marked dirty in hopes that a later
991                  * write will work
992                  */
993                 if (err) {
994                         werr = err;
995                         continue;
996                 }
997                 clear_extent_bits(block_group_cache, start, end,
998                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
999         }
1000         btrfs_free_path(path);
1001         return werr;
1002 }
1003
1004 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1005                                                   u64 flags)
1006 {
1007         struct list_head *head = &info->space_info;
1008         struct list_head *cur;
1009         struct btrfs_space_info *found;
1010         list_for_each(cur, head) {
1011                 found = list_entry(cur, struct btrfs_space_info, list);
1012                 if (found->flags == flags)
1013                         return found;
1014         }
1015         return NULL;
1016
1017 }
1018
1019 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1020                              u64 total_bytes, u64 bytes_used,
1021                              struct btrfs_space_info **space_info)
1022 {
1023         struct btrfs_space_info *found;
1024
1025         found = __find_space_info(info, flags);
1026         if (found) {
1027                 found->total_bytes += total_bytes;
1028                 found->bytes_used += bytes_used;
1029                 found->full = 0;
1030                 WARN_ON(found->total_bytes < found->bytes_used);
1031                 *space_info = found;
1032                 return 0;
1033         }
1034         found = kmalloc(sizeof(*found), GFP_NOFS);
1035         if (!found)
1036                 return -ENOMEM;
1037
1038         list_add(&found->list, &info->space_info);
1039         found->flags = flags;
1040         found->total_bytes = total_bytes;
1041         found->bytes_used = bytes_used;
1042         found->bytes_pinned = 0;
1043         found->full = 0;
1044         *space_info = found;
1045         return 0;
1046 }
1047
1048 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1049 {
1050         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1051                                    BTRFS_BLOCK_GROUP_RAID1 |
1052                                    BTRFS_BLOCK_GROUP_RAID10 |
1053                                    BTRFS_BLOCK_GROUP_DUP);
1054         if (extra_flags) {
1055                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1056                         fs_info->avail_data_alloc_bits |= extra_flags;
1057                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1058                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1059                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1060                         fs_info->avail_system_alloc_bits |= extra_flags;
1061         }
1062 }
1063
1064 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1065 {
1066         u64 num_devices = root->fs_info->fs_devices->num_devices;
1067
1068         if (num_devices == 1)
1069                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1070         if (num_devices < 4)
1071                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1072
1073         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1074             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1075                       BTRFS_BLOCK_GROUP_RAID10))) {
1076                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1077         }
1078
1079         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1080             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1081                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1082         }
1083
1084         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1085             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1086              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1087              (flags & BTRFS_BLOCK_GROUP_DUP)))
1088                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1089         return flags;
1090 }
1091
1092 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1093                           struct btrfs_root *extent_root, u64 alloc_bytes,
1094                           u64 flags)
1095 {
1096         struct btrfs_space_info *space_info;
1097         u64 thresh;
1098         u64 start;
1099         u64 num_bytes;
1100         int ret;
1101
1102         flags = reduce_alloc_profile(extent_root, flags);
1103
1104         space_info = __find_space_info(extent_root->fs_info, flags);
1105         if (!space_info) {
1106                 ret = update_space_info(extent_root->fs_info, flags,
1107                                         0, 0, &space_info);
1108                 BUG_ON(ret);
1109         }
1110         BUG_ON(!space_info);
1111
1112         if (space_info->full)
1113                 return 0;
1114
1115         thresh = div_factor(space_info->total_bytes, 6);
1116         if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1117             thresh)
1118                 return 0;
1119
1120         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1121         if (ret == -ENOSPC) {
1122 printk("space info full %Lu\n", flags);
1123                 space_info->full = 1;
1124                 return 0;
1125         }
1126
1127         BUG_ON(ret);
1128
1129         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1130                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1131         BUG_ON(ret);
1132
1133         return 0;
1134 }
1135
1136 static int update_block_group(struct btrfs_trans_handle *trans,
1137                               struct btrfs_root *root,
1138                               u64 bytenr, u64 num_bytes, int alloc,
1139                               int mark_free)
1140 {
1141         struct btrfs_block_group_cache *cache;
1142         struct btrfs_fs_info *info = root->fs_info;
1143         u64 total = num_bytes;
1144         u64 old_val;
1145         u64 byte_in_group;
1146         u64 start;
1147         u64 end;
1148
1149         while(total) {
1150                 cache = btrfs_lookup_block_group(info, bytenr);
1151                 if (!cache) {
1152                         return -1;
1153                 }
1154                 byte_in_group = bytenr - cache->key.objectid;
1155                 WARN_ON(byte_in_group > cache->key.offset);
1156                 start = cache->key.objectid;
1157                 end = start + cache->key.offset - 1;
1158                 set_extent_bits(&info->block_group_cache, start, end,
1159                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1160
1161                 old_val = btrfs_block_group_used(&cache->item);
1162                 num_bytes = min(total, cache->key.offset - byte_in_group);
1163                 if (alloc) {
1164                         old_val += num_bytes;
1165                         cache->space_info->bytes_used += num_bytes;
1166                 } else {
1167                         old_val -= num_bytes;
1168                         cache->space_info->bytes_used -= num_bytes;
1169                         if (mark_free) {
1170                                 set_extent_dirty(&info->free_space_cache,
1171                                                  bytenr, bytenr + num_bytes - 1,
1172                                                  GFP_NOFS);
1173                         }
1174                 }
1175                 btrfs_set_block_group_used(&cache->item, old_val);
1176                 total -= num_bytes;
1177                 bytenr += num_bytes;
1178         }
1179         return 0;
1180 }
1181
1182 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1183 {
1184         u64 start;
1185         u64 end;
1186         int ret;
1187         ret = find_first_extent_bit(&root->fs_info->block_group_cache,
1188                                     search_start, &start, &end,
1189                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
1190                                     BLOCK_GROUP_SYSTEM);
1191         if (ret)
1192                 return 0;
1193         return start;
1194 }
1195
1196
1197 static int update_pinned_extents(struct btrfs_root *root,
1198                                 u64 bytenr, u64 num, int pin)
1199 {
1200         u64 len;
1201         struct btrfs_block_group_cache *cache;
1202         struct btrfs_fs_info *fs_info = root->fs_info;
1203
1204         if (pin) {
1205                 set_extent_dirty(&fs_info->pinned_extents,
1206                                 bytenr, bytenr + num - 1, GFP_NOFS);
1207         } else {
1208                 clear_extent_dirty(&fs_info->pinned_extents,
1209                                 bytenr, bytenr + num - 1, GFP_NOFS);
1210         }
1211         while (num > 0) {
1212                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1213                 if (!cache) {
1214                         u64 first = first_logical_byte(root, bytenr);
1215                         WARN_ON(first < bytenr);
1216                         len = min(first - bytenr, num);
1217                 } else {
1218                         len = min(num, cache->key.offset -
1219                                   (bytenr - cache->key.objectid));
1220                 }
1221                 if (pin) {
1222                         if (cache) {
1223                                 cache->pinned += len;
1224                                 cache->space_info->bytes_pinned += len;
1225                         }
1226                         fs_info->total_pinned += len;
1227                 } else {
1228                         if (cache) {
1229                                 cache->pinned -= len;
1230                                 cache->space_info->bytes_pinned -= len;
1231                         }
1232                         fs_info->total_pinned -= len;
1233                 }
1234                 bytenr += len;
1235                 num -= len;
1236         }
1237         return 0;
1238 }
1239
1240 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1241 {
1242         u64 last = 0;
1243         u64 start;
1244         u64 end;
1245         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1246         int ret;
1247
1248         while(1) {
1249                 ret = find_first_extent_bit(pinned_extents, last,
1250                                             &start, &end, EXTENT_DIRTY);
1251                 if (ret)
1252                         break;
1253                 set_extent_dirty(copy, start, end, GFP_NOFS);
1254                 last = end + 1;
1255         }
1256         return 0;
1257 }
1258
1259 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1260                                struct btrfs_root *root,
1261                                struct extent_io_tree *unpin)
1262 {
1263         u64 start;
1264         u64 end;
1265         int ret;
1266         struct extent_io_tree *free_space_cache;
1267         free_space_cache = &root->fs_info->free_space_cache;
1268
1269         while(1) {
1270                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1271                                             EXTENT_DIRTY);
1272                 if (ret)
1273                         break;
1274                 update_pinned_extents(root, start, end + 1 - start, 0);
1275                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1276                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1277         }
1278         return 0;
1279 }
1280
1281 static int finish_current_insert(struct btrfs_trans_handle *trans,
1282                                  struct btrfs_root *extent_root)
1283 {
1284         u64 start;
1285         u64 end;
1286         struct btrfs_fs_info *info = extent_root->fs_info;
1287         struct extent_buffer *eb;
1288         struct btrfs_path *path;
1289         struct btrfs_key ins;
1290         struct btrfs_disk_key first;
1291         struct btrfs_extent_item extent_item;
1292         int ret;
1293         int level;
1294         int err = 0;
1295
1296         btrfs_set_stack_extent_refs(&extent_item, 1);
1297         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1298         path = btrfs_alloc_path();
1299
1300         while(1) {
1301                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1302                                             &end, EXTENT_LOCKED);
1303                 if (ret)
1304                         break;
1305
1306                 ins.objectid = start;
1307                 ins.offset = end + 1 - start;
1308                 err = btrfs_insert_item(trans, extent_root, &ins,
1309                                         &extent_item, sizeof(extent_item));
1310                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1311                                   GFP_NOFS);
1312                 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1313                 level = btrfs_header_level(eb);
1314                 if (level == 0) {
1315                         btrfs_item_key(eb, &first, 0);
1316                 } else {
1317                         btrfs_node_key(eb, &first, 0);
1318                 }
1319                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1320                                           start, extent_root->root_key.objectid,
1321                                           0, level,
1322                                           btrfs_disk_key_objectid(&first));
1323                 BUG_ON(err);
1324                 free_extent_buffer(eb);
1325         }
1326         btrfs_free_path(path);
1327         return 0;
1328 }
1329
1330 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1331                           int pending)
1332 {
1333         int err = 0;
1334         struct extent_buffer *buf;
1335
1336         if (!pending) {
1337                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1338                 if (buf) {
1339                         if (btrfs_buffer_uptodate(buf)) {
1340                                 u64 transid =
1341                                     root->fs_info->running_transaction->transid;
1342                                 u64 header_transid =
1343                                         btrfs_header_generation(buf);
1344                                 if (header_transid == transid &&
1345                                     !btrfs_header_flag(buf,
1346                                                BTRFS_HEADER_FLAG_WRITTEN)) {
1347                                         clean_tree_block(NULL, root, buf);
1348                                         free_extent_buffer(buf);
1349                                         return 1;
1350                                 }
1351                         }
1352                         free_extent_buffer(buf);
1353                 }
1354                 update_pinned_extents(root, bytenr, num_bytes, 1);
1355         } else {
1356                 set_extent_bits(&root->fs_info->pending_del,
1357                                 bytenr, bytenr + num_bytes - 1,
1358                                 EXTENT_LOCKED, GFP_NOFS);
1359         }
1360         BUG_ON(err < 0);
1361         return 0;
1362 }
1363
1364 /*
1365  * remove an extent from the root, returns 0 on success
1366  */
1367 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1368                          *root, u64 bytenr, u64 num_bytes,
1369                          u64 root_objectid, u64 ref_generation,
1370                          u64 owner_objectid, u64 owner_offset, int pin,
1371                          int mark_free)
1372 {
1373         struct btrfs_path *path;
1374         struct btrfs_key key;
1375         struct btrfs_fs_info *info = root->fs_info;
1376         struct btrfs_root *extent_root = info->extent_root;
1377         struct extent_buffer *leaf;
1378         int ret;
1379         int extent_slot = 0;
1380         int found_extent = 0;
1381         int num_to_del = 1;
1382         struct btrfs_extent_item *ei;
1383         u32 refs;
1384
1385         key.objectid = bytenr;
1386         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1387         key.offset = num_bytes;
1388         path = btrfs_alloc_path();
1389         if (!path)
1390                 return -ENOMEM;
1391
1392         path->reada = 1;
1393         ret = lookup_extent_backref(trans, extent_root, path,
1394                                     bytenr, root_objectid,
1395                                     ref_generation,
1396                                     owner_objectid, owner_offset, 1);
1397         if (ret == 0) {
1398                 struct btrfs_key found_key;
1399                 extent_slot = path->slots[0];
1400                 while(extent_slot > 0) {
1401                         extent_slot--;
1402                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1403                                               extent_slot);
1404                         if (found_key.objectid != bytenr)
1405                                 break;
1406                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1407                             found_key.offset == num_bytes) {
1408                                 found_extent = 1;
1409                                 break;
1410                         }
1411                         if (path->slots[0] - extent_slot > 5)
1412                                 break;
1413                 }
1414                 if (!found_extent)
1415                         ret = btrfs_del_item(trans, extent_root, path);
1416         } else {
1417                 btrfs_print_leaf(extent_root, path->nodes[0]);
1418                 WARN_ON(1);
1419                 printk("Unable to find ref byte nr %Lu root %Lu "
1420                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1421                        root_objectid, ref_generation, owner_objectid,
1422                        owner_offset);
1423         }
1424         if (!found_extent) {
1425                 btrfs_release_path(extent_root, path);
1426                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1427                 if (ret < 0)
1428                         return ret;
1429                 BUG_ON(ret);
1430                 extent_slot = path->slots[0];
1431         }
1432
1433         leaf = path->nodes[0];
1434         ei = btrfs_item_ptr(leaf, extent_slot,
1435                             struct btrfs_extent_item);
1436         refs = btrfs_extent_refs(leaf, ei);
1437         BUG_ON(refs == 0);
1438         refs -= 1;
1439         btrfs_set_extent_refs(leaf, ei, refs);
1440
1441         btrfs_mark_buffer_dirty(leaf);
1442
1443         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1444                 /* if the back ref and the extent are next to each other
1445                  * they get deleted below in one shot
1446                  */
1447                 path->slots[0] = extent_slot;
1448                 num_to_del = 2;
1449         } else if (found_extent) {
1450                 /* otherwise delete the extent back ref */
1451                 ret = btrfs_del_item(trans, extent_root, path);
1452                 BUG_ON(ret);
1453                 /* if refs are 0, we need to setup the path for deletion */
1454                 if (refs == 0) {
1455                         btrfs_release_path(extent_root, path);
1456                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1457                                                 -1, 1);
1458                         if (ret < 0)
1459                                 return ret;
1460                         BUG_ON(ret);
1461                 }
1462         }
1463
1464         if (refs == 0) {
1465                 u64 super_used;
1466                 u64 root_used;
1467
1468                 if (pin) {
1469                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1470                         if (ret > 0)
1471                                 mark_free = 1;
1472                         BUG_ON(ret < 0);
1473                 }
1474
1475                 /* block accounting for super block */
1476                 super_used = btrfs_super_bytes_used(&info->super_copy);
1477                 btrfs_set_super_bytes_used(&info->super_copy,
1478                                            super_used - num_bytes);
1479
1480                 /* block accounting for root item */
1481                 root_used = btrfs_root_used(&root->root_item);
1482                 btrfs_set_root_used(&root->root_item,
1483                                            root_used - num_bytes);
1484                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1485                                       num_to_del);
1486                 if (ret) {
1487                         return ret;
1488                 }
1489                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1490                                          mark_free);
1491                 BUG_ON(ret);
1492         }
1493         btrfs_free_path(path);
1494         finish_current_insert(trans, extent_root);
1495         return ret;
1496 }
1497
1498 /*
1499  * find all the blocks marked as pending in the radix tree and remove
1500  * them from the extent map
1501  */
1502 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1503                                btrfs_root *extent_root)
1504 {
1505         int ret;
1506         int err = 0;
1507         u64 start;
1508         u64 end;
1509         struct extent_io_tree *pending_del;
1510         struct extent_io_tree *pinned_extents;
1511
1512         pending_del = &extent_root->fs_info->pending_del;
1513         pinned_extents = &extent_root->fs_info->pinned_extents;
1514
1515         while(1) {
1516                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1517                                             EXTENT_LOCKED);
1518                 if (ret)
1519                         break;
1520                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1521                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1522                                   GFP_NOFS);
1523                 ret = __free_extent(trans, extent_root,
1524                                      start, end + 1 - start,
1525                                      extent_root->root_key.objectid,
1526                                      0, 0, 0, 0, 0);
1527                 if (ret)
1528                         err = ret;
1529         }
1530         return err;
1531 }
1532
1533 /*
1534  * remove an extent from the root, returns 0 on success
1535  */
1536 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1537                       *root, u64 bytenr, u64 num_bytes,
1538                       u64 root_objectid, u64 ref_generation,
1539                       u64 owner_objectid, u64 owner_offset, int pin)
1540 {
1541         struct btrfs_root *extent_root = root->fs_info->extent_root;
1542         int pending_ret;
1543         int ret;
1544
1545         WARN_ON(num_bytes < root->sectorsize);
1546         if (!root->ref_cows)
1547                 ref_generation = 0;
1548
1549         if (root == extent_root) {
1550                 pin_down_bytes(root, bytenr, num_bytes, 1);
1551                 return 0;
1552         }
1553         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1554                             ref_generation, owner_objectid, owner_offset,
1555                             pin, pin == 0);
1556         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1557         return ret ? ret : pending_ret;
1558 }
1559
1560 static u64 stripe_align(struct btrfs_root *root, u64 val)
1561 {
1562         u64 mask = ((u64)root->stripesize - 1);
1563         u64 ret = (val + mask) & ~mask;
1564         return ret;
1565 }
1566
1567 /*
1568  * walks the btree of allocated extents and find a hole of a given size.
1569  * The key ins is changed to record the hole:
1570  * ins->objectid == block start
1571  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1572  * ins->offset == number of blocks
1573  * Any available blocks before search_start are skipped.
1574  */
1575 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1576                                      struct btrfs_root *orig_root,
1577                                      u64 num_bytes, u64 empty_size,
1578                                      u64 search_start, u64 search_end,
1579                                      u64 hint_byte, struct btrfs_key *ins,
1580                                      u64 exclude_start, u64 exclude_nr,
1581                                      int data)
1582 {
1583         int ret;
1584         u64 orig_search_start;
1585         struct btrfs_root * root = orig_root->fs_info->extent_root;
1586         struct btrfs_fs_info *info = root->fs_info;
1587         u64 total_needed = num_bytes;
1588         u64 *last_ptr = NULL;
1589         struct btrfs_block_group_cache *block_group;
1590         int full_scan = 0;
1591         int wrapped = 0;
1592         int empty_cluster = 2 * 1024 * 1024;
1593
1594         WARN_ON(num_bytes < root->sectorsize);
1595         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1596
1597         if (data & BTRFS_BLOCK_GROUP_METADATA) {
1598                 last_ptr = &root->fs_info->last_alloc;
1599                 empty_cluster = 256 * 1024;
1600         }
1601
1602         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
1603                 last_ptr = &root->fs_info->last_data_alloc;
1604         }
1605
1606         if (last_ptr) {
1607                 if (*last_ptr)
1608                         hint_byte = *last_ptr;
1609                 else {
1610                         empty_size += empty_cluster;
1611                 }
1612         }
1613
1614         search_start = max(search_start, first_logical_byte(root, 0));
1615         orig_search_start = search_start;
1616
1617         if (search_end == (u64)-1)
1618                 search_end = btrfs_super_total_bytes(&info->super_copy);
1619
1620         if (hint_byte) {
1621                 block_group = btrfs_lookup_block_group(info, hint_byte);
1622                 if (!block_group)
1623                         hint_byte = search_start;
1624                 block_group = btrfs_find_block_group(root, block_group,
1625                                                      hint_byte, data, 1);
1626                 if (last_ptr && *last_ptr == 0 && block_group)
1627                         hint_byte = block_group->key.objectid;
1628         } else {
1629                 block_group = btrfs_find_block_group(root,
1630                                                      trans->block_group,
1631                                                      search_start, data, 1);
1632         }
1633         search_start = max(search_start, hint_byte);
1634
1635         total_needed += empty_size;
1636
1637 check_failed:
1638         if (!block_group) {
1639                 block_group = btrfs_lookup_block_group(info, search_start);
1640                 if (!block_group)
1641                         block_group = btrfs_lookup_block_group(info,
1642                                                        orig_search_start);
1643         }
1644         ret = find_search_start(root, &block_group, &search_start,
1645                                 total_needed, data);
1646         if (ret == -ENOSPC && last_ptr && *last_ptr) {
1647                 *last_ptr = 0;
1648                 block_group = btrfs_lookup_block_group(info,
1649                                                        orig_search_start);
1650                 search_start = orig_search_start;
1651                 ret = find_search_start(root, &block_group, &search_start,
1652                                         total_needed, data);
1653         }
1654         if (ret == -ENOSPC)
1655                 goto enospc;
1656         if (ret)
1657                 goto error;
1658
1659         if (last_ptr && *last_ptr && search_start != *last_ptr) {
1660                 *last_ptr = 0;
1661                 if (!empty_size) {
1662                         empty_size += empty_cluster;
1663                         total_needed += empty_size;
1664                 }
1665                 block_group = btrfs_lookup_block_group(info,
1666                                                        orig_search_start);
1667                 search_start = orig_search_start;
1668                 ret = find_search_start(root, &block_group,
1669                                         &search_start, total_needed, data);
1670                 if (ret == -ENOSPC)
1671                         goto enospc;
1672                 if (ret)
1673                         goto error;
1674         }
1675
1676         search_start = stripe_align(root, search_start);
1677         ins->objectid = search_start;
1678         ins->offset = num_bytes;
1679
1680         if (ins->objectid + num_bytes >= search_end)
1681                 goto enospc;
1682
1683         if (ins->objectid + num_bytes >
1684             block_group->key.objectid + block_group->key.offset) {
1685                 search_start = block_group->key.objectid +
1686                         block_group->key.offset;
1687                 goto new_group;
1688         }
1689
1690         if (test_range_bit(&info->extent_ins, ins->objectid,
1691                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1692                 search_start = ins->objectid + num_bytes;
1693                 goto new_group;
1694         }
1695
1696         if (test_range_bit(&info->pinned_extents, ins->objectid,
1697                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1698                 search_start = ins->objectid + num_bytes;
1699                 goto new_group;
1700         }
1701
1702         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1703             ins->objectid < exclude_start + exclude_nr)) {
1704                 search_start = exclude_start + exclude_nr;
1705                 goto new_group;
1706         }
1707
1708         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1709                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1710                 if (block_group)
1711                         trans->block_group = block_group;
1712         }
1713         ins->offset = num_bytes;
1714         if (last_ptr) {
1715                 *last_ptr = ins->objectid + ins->offset;
1716                 if (*last_ptr ==
1717                     btrfs_super_total_bytes(&root->fs_info->super_copy)) {
1718                         *last_ptr = 0;
1719                 }
1720         }
1721         return 0;
1722
1723 new_group:
1724         if (search_start + num_bytes >= search_end) {
1725 enospc:
1726                 search_start = orig_search_start;
1727                 if (full_scan) {
1728                         ret = -ENOSPC;
1729                         goto error;
1730                 }
1731                 if (wrapped) {
1732                         if (!full_scan)
1733                                 total_needed -= empty_size;
1734                         full_scan = 1;
1735                 } else
1736                         wrapped = 1;
1737         }
1738         block_group = btrfs_lookup_block_group(info, search_start);
1739         cond_resched();
1740         block_group = btrfs_find_block_group(root, block_group,
1741                                              search_start, data, 0);
1742         goto check_failed;
1743
1744 error:
1745         return ret;
1746 }
1747
1748 /*
1749  * finds a free extent and does all the dirty work required for allocation
1750  * returns the key for the extent through ins, and a tree buffer for
1751  * the first block of the extent through buf.
1752  *
1753  * returns 0 if everything worked, non-zero otherwise.
1754  */
1755 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1756                        struct btrfs_root *root,
1757                        u64 num_bytes, u64 min_alloc_size,
1758                        u64 root_objectid, u64 ref_generation,
1759                        u64 owner, u64 owner_offset,
1760                        u64 empty_size, u64 hint_byte,
1761                        u64 search_end, struct btrfs_key *ins, u64 data)
1762 {
1763         int ret;
1764         int pending_ret;
1765         u64 super_used;
1766         u64 root_used;
1767         u64 search_start = 0;
1768         u64 alloc_profile;
1769         u32 sizes[2];
1770         struct btrfs_fs_info *info = root->fs_info;
1771         struct btrfs_root *extent_root = info->extent_root;
1772         struct btrfs_extent_item *extent_item;
1773         struct btrfs_extent_ref *ref;
1774         struct btrfs_path *path;
1775         struct btrfs_key keys[2];
1776
1777         if (data) {
1778                 alloc_profile = info->avail_data_alloc_bits &
1779                                 info->data_alloc_profile;
1780                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1781         } else if (root == root->fs_info->chunk_root) {
1782                 alloc_profile = info->avail_system_alloc_bits &
1783                                 info->system_alloc_profile;
1784                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1785         } else {
1786                 alloc_profile = info->avail_metadata_alloc_bits &
1787                                 info->metadata_alloc_profile;
1788                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1789         }
1790 again:
1791         data = reduce_alloc_profile(root, data);
1792         if (root->ref_cows) {
1793                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1794                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1795                                              2 * 1024 * 1024,
1796                                              BTRFS_BLOCK_GROUP_METADATA |
1797                                              (info->metadata_alloc_profile &
1798                                               info->avail_metadata_alloc_bits));
1799                         BUG_ON(ret);
1800                 }
1801                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1802                                      num_bytes + 2 * 1024 * 1024, data);
1803                 BUG_ON(ret);
1804         }
1805
1806         WARN_ON(num_bytes < root->sectorsize);
1807         ret = find_free_extent(trans, root, num_bytes, empty_size,
1808                                search_start, search_end, hint_byte, ins,
1809                                trans->alloc_exclude_start,
1810                                trans->alloc_exclude_nr, data);
1811
1812         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
1813                 num_bytes = num_bytes >> 1;
1814                 num_bytes = max(num_bytes, min_alloc_size);
1815                 goto again;
1816         }
1817         if (ret) {
1818                 printk("allocation failed flags %Lu\n", data);
1819         }
1820         BUG_ON(ret);
1821         if (ret)
1822                 return ret;
1823
1824         /* block accounting for super block */
1825         super_used = btrfs_super_bytes_used(&info->super_copy);
1826         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1827
1828         /* block accounting for root item */
1829         root_used = btrfs_root_used(&root->root_item);
1830         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1831
1832         clear_extent_dirty(&root->fs_info->free_space_cache,
1833                            ins->objectid, ins->objectid + ins->offset - 1,
1834                            GFP_NOFS);
1835
1836         if (root == extent_root) {
1837                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1838                                 ins->objectid + ins->offset - 1,
1839                                 EXTENT_LOCKED, GFP_NOFS);
1840                 goto update_block;
1841         }
1842
1843         WARN_ON(trans->alloc_exclude_nr);
1844         trans->alloc_exclude_start = ins->objectid;
1845         trans->alloc_exclude_nr = ins->offset;
1846
1847         memcpy(&keys[0], ins, sizeof(*ins));
1848         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1849                                          owner, owner_offset);
1850         keys[1].objectid = ins->objectid;
1851         keys[1].type = BTRFS_EXTENT_REF_KEY;
1852         sizes[0] = sizeof(*extent_item);
1853         sizes[1] = sizeof(*ref);
1854
1855         path = btrfs_alloc_path();
1856         BUG_ON(!path);
1857
1858         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1859                                        sizes, 2);
1860
1861         BUG_ON(ret);
1862         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1863                                      struct btrfs_extent_item);
1864         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1865         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1866                              struct btrfs_extent_ref);
1867
1868         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1869         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1870         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1871         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
1872
1873         btrfs_mark_buffer_dirty(path->nodes[0]);
1874
1875         trans->alloc_exclude_start = 0;
1876         trans->alloc_exclude_nr = 0;
1877         btrfs_free_path(path);
1878         finish_current_insert(trans, extent_root);
1879         pending_ret = del_pending_extents(trans, extent_root);
1880
1881         if (ret) {
1882                 return ret;
1883         }
1884         if (pending_ret) {
1885                 return pending_ret;
1886         }
1887
1888 update_block:
1889         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1890         if (ret) {
1891                 printk("update block group failed for %Lu %Lu\n",
1892                        ins->objectid, ins->offset);
1893                 BUG();
1894         }
1895         return 0;
1896 }
1897
1898 /*
1899  * helper function to allocate a block for a given tree
1900  * returns the tree buffer or NULL.
1901  */
1902 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1903                                              struct btrfs_root *root,
1904                                              u32 blocksize,
1905                                              u64 root_objectid, u64 hint,
1906                                              u64 empty_size)
1907 {
1908         u64 ref_generation;
1909
1910         if (root->ref_cows)
1911                 ref_generation = trans->transid;
1912         else
1913                 ref_generation = 0;
1914
1915
1916         return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1917                                         ref_generation, 0, 0, hint, empty_size);
1918 }
1919
1920 /*
1921  * helper function to allocate a block for a given tree
1922  * returns the tree buffer or NULL.
1923  */
1924 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1925                                              struct btrfs_root *root,
1926                                              u32 blocksize,
1927                                              u64 root_objectid,
1928                                              u64 ref_generation,
1929                                              u64 first_objectid,
1930                                              int level,
1931                                              u64 hint,
1932                                              u64 empty_size)
1933 {
1934         struct btrfs_key ins;
1935         int ret;
1936         struct extent_buffer *buf;
1937
1938         ret = btrfs_alloc_extent(trans, root, blocksize, blocksize,
1939                                  root_objectid, ref_generation,
1940                                  level, first_objectid, empty_size, hint,
1941                                  (u64)-1, &ins, 0);
1942         if (ret) {
1943                 BUG_ON(ret > 0);
1944                 return ERR_PTR(ret);
1945         }
1946         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1947         if (!buf) {
1948                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1949                                   root->root_key.objectid, ref_generation,
1950                                   0, 0, 0);
1951                 return ERR_PTR(-ENOMEM);
1952         }
1953         btrfs_set_header_generation(buf, trans->transid);
1954         clean_tree_block(trans, root, buf);
1955         btrfs_set_buffer_uptodate(buf);
1956
1957         if (PageDirty(buf->first_page)) {
1958                 printk("page %lu dirty\n", buf->first_page->index);
1959                 WARN_ON(1);
1960         }
1961
1962         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1963                          buf->start + buf->len - 1, GFP_NOFS);
1964         if (!btrfs_test_opt(root, SSD))
1965                 btrfs_set_buffer_defrag(buf);
1966         trans->blocks_used++;
1967         return buf;
1968 }
1969
1970 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1971                                   struct btrfs_root *root,
1972                                   struct extent_buffer *leaf)
1973 {
1974         u64 leaf_owner;
1975         u64 leaf_generation;
1976         struct btrfs_key key;
1977         struct btrfs_file_extent_item *fi;
1978         int i;
1979         int nritems;
1980         int ret;
1981
1982         BUG_ON(!btrfs_is_leaf(leaf));
1983         nritems = btrfs_header_nritems(leaf);
1984         leaf_owner = btrfs_header_owner(leaf);
1985         leaf_generation = btrfs_header_generation(leaf);
1986
1987         for (i = 0; i < nritems; i++) {
1988                 u64 disk_bytenr;
1989
1990                 btrfs_item_key_to_cpu(leaf, &key, i);
1991                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1992                         continue;
1993                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1994                 if (btrfs_file_extent_type(leaf, fi) ==
1995                     BTRFS_FILE_EXTENT_INLINE)
1996                         continue;
1997                 /*
1998                  * FIXME make sure to insert a trans record that
1999                  * repeats the snapshot del on crash
2000                  */
2001                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2002                 if (disk_bytenr == 0)
2003                         continue;
2004                 ret = btrfs_free_extent(trans, root, disk_bytenr,
2005                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2006                                 leaf_owner, leaf_generation,
2007                                 key.objectid, key.offset, 0);
2008                 BUG_ON(ret);
2009         }
2010         return 0;
2011 }
2012
2013 static void noinline reada_walk_down(struct btrfs_root *root,
2014                                      struct extent_buffer *node,
2015                                      int slot)
2016 {
2017         u64 bytenr;
2018         u64 last = 0;
2019         u32 nritems;
2020         u32 refs;
2021         u32 blocksize;
2022         int ret;
2023         int i;
2024         int level;
2025         int skipped = 0;
2026
2027         nritems = btrfs_header_nritems(node);
2028         level = btrfs_header_level(node);
2029         if (level)
2030                 return;
2031
2032         for (i = slot; i < nritems && skipped < 32; i++) {
2033                 bytenr = btrfs_node_blockptr(node, i);
2034                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
2035                              (last > bytenr && last - bytenr > 32 * 1024))) {
2036                         skipped++;
2037                         continue;
2038                 }
2039                 blocksize = btrfs_level_size(root, level - 1);
2040                 if (i != slot) {
2041                         ret = lookup_extent_ref(NULL, root, bytenr,
2042                                                 blocksize, &refs);
2043                         BUG_ON(ret);
2044                         if (refs != 1) {
2045                                 skipped++;
2046                                 continue;
2047                         }
2048                 }
2049                 mutex_unlock(&root->fs_info->fs_mutex);
2050                 ret = readahead_tree_block(root, bytenr, blocksize);
2051                 last = bytenr + blocksize;
2052                 cond_resched();
2053                 mutex_lock(&root->fs_info->fs_mutex);
2054                 if (ret)
2055                         break;
2056         }
2057 }
2058
2059 /*
2060  * helper function for drop_snapshot, this walks down the tree dropping ref
2061  * counts as it goes.
2062  */
2063 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2064                                    struct btrfs_root *root,
2065                                    struct btrfs_path *path, int *level)
2066 {
2067         u64 root_owner;
2068         u64 root_gen;
2069         u64 bytenr;
2070         struct extent_buffer *next;
2071         struct extent_buffer *cur;
2072         struct extent_buffer *parent;
2073         u32 blocksize;
2074         int ret;
2075         u32 refs;
2076
2077         WARN_ON(*level < 0);
2078         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2079         ret = lookup_extent_ref(trans, root,
2080                                 path->nodes[*level]->start,
2081                                 path->nodes[*level]->len, &refs);
2082         BUG_ON(ret);
2083         if (refs > 1)
2084                 goto out;
2085
2086         /*
2087          * walk down to the last node level and free all the leaves
2088          */
2089         while(*level >= 0) {
2090                 WARN_ON(*level < 0);
2091                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2092                 cur = path->nodes[*level];
2093
2094                 if (btrfs_header_level(cur) != *level)
2095                         WARN_ON(1);
2096
2097                 if (path->slots[*level] >=
2098                     btrfs_header_nritems(cur))
2099                         break;
2100                 if (*level == 0) {
2101                         ret = drop_leaf_ref(trans, root, cur);
2102                         BUG_ON(ret);
2103                         break;
2104                 }
2105                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2106                 blocksize = btrfs_level_size(root, *level - 1);
2107                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
2108                 BUG_ON(ret);
2109                 if (refs != 1) {
2110                         parent = path->nodes[*level];
2111                         root_owner = btrfs_header_owner(parent);
2112                         root_gen = btrfs_header_generation(parent);
2113                         path->slots[*level]++;
2114                         ret = btrfs_free_extent(trans, root, bytenr,
2115                                                 blocksize, root_owner,
2116                                                 root_gen, 0, 0, 1);
2117                         BUG_ON(ret);
2118                         continue;
2119                 }
2120                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2121                 if (!next || !btrfs_buffer_uptodate(next)) {
2122                         free_extent_buffer(next);
2123                         reada_walk_down(root, cur, path->slots[*level]);
2124
2125                         mutex_unlock(&root->fs_info->fs_mutex);
2126                         next = read_tree_block(root, bytenr, blocksize);
2127                         mutex_lock(&root->fs_info->fs_mutex);
2128
2129                         /* we've dropped the lock, double check */
2130                         ret = lookup_extent_ref(trans, root, bytenr,
2131                                                 blocksize, &refs);
2132                         BUG_ON(ret);
2133                         if (refs != 1) {
2134                                 parent = path->nodes[*level];
2135                                 root_owner = btrfs_header_owner(parent);
2136                                 root_gen = btrfs_header_generation(parent);
2137
2138                                 path->slots[*level]++;
2139                                 free_extent_buffer(next);
2140                                 ret = btrfs_free_extent(trans, root, bytenr,
2141                                                         blocksize,
2142                                                         root_owner,
2143                                                         root_gen, 0, 0, 1);
2144                                 BUG_ON(ret);
2145                                 continue;
2146                         }
2147                 } else if (next) {
2148                         btrfs_verify_block_csum(root, next);
2149                 }
2150                 WARN_ON(*level <= 0);
2151                 if (path->nodes[*level-1])
2152                         free_extent_buffer(path->nodes[*level-1]);
2153                 path->nodes[*level-1] = next;
2154                 *level = btrfs_header_level(next);
2155                 path->slots[*level] = 0;
2156         }
2157 out:
2158         WARN_ON(*level < 0);
2159         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2160
2161         if (path->nodes[*level] == root->node) {
2162                 root_owner = root->root_key.objectid;
2163                 parent = path->nodes[*level];
2164         } else {
2165                 parent = path->nodes[*level + 1];
2166                 root_owner = btrfs_header_owner(parent);
2167         }
2168
2169         root_gen = btrfs_header_generation(parent);
2170         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
2171                                 path->nodes[*level]->len,
2172                                 root_owner, root_gen, 0, 0, 1);
2173         free_extent_buffer(path->nodes[*level]);
2174         path->nodes[*level] = NULL;
2175         *level += 1;
2176         BUG_ON(ret);
2177         return 0;
2178 }
2179
2180 /*
2181  * helper for dropping snapshots.  This walks back up the tree in the path
2182  * to find the first node higher up where we haven't yet gone through
2183  * all the slots
2184  */
2185 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2186                                  struct btrfs_root *root,
2187                                  struct btrfs_path *path, int *level)
2188 {
2189         u64 root_owner;
2190         u64 root_gen;
2191         struct btrfs_root_item *root_item = &root->root_item;
2192         int i;
2193         int slot;
2194         int ret;
2195
2196         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2197                 slot = path->slots[i];
2198                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2199                         struct extent_buffer *node;
2200                         struct btrfs_disk_key disk_key;
2201                         node = path->nodes[i];
2202                         path->slots[i]++;
2203                         *level = i;
2204                         WARN_ON(*level == 0);
2205                         btrfs_node_key(node, &disk_key, path->slots[i]);
2206                         memcpy(&root_item->drop_progress,
2207                                &disk_key, sizeof(disk_key));
2208                         root_item->drop_level = i;
2209                         return 0;
2210                 } else {
2211                         if (path->nodes[*level] == root->node) {
2212                                 root_owner = root->root_key.objectid;
2213                                 root_gen =
2214                                    btrfs_header_generation(path->nodes[*level]);
2215                         } else {
2216                                 struct extent_buffer *node;
2217                                 node = path->nodes[*level + 1];
2218                                 root_owner = btrfs_header_owner(node);
2219                                 root_gen = btrfs_header_generation(node);
2220                         }
2221                         ret = btrfs_free_extent(trans, root,
2222                                                 path->nodes[*level]->start,
2223                                                 path->nodes[*level]->len,
2224                                                 root_owner, root_gen, 0, 0, 1);
2225                         BUG_ON(ret);
2226                         free_extent_buffer(path->nodes[*level]);
2227                         path->nodes[*level] = NULL;
2228                         *level = i + 1;
2229                 }
2230         }
2231         return 1;
2232 }
2233
2234 /*
2235  * drop the reference count on the tree rooted at 'snap'.  This traverses
2236  * the tree freeing any blocks that have a ref count of zero after being
2237  * decremented.
2238  */
2239 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2240                         *root)
2241 {
2242         int ret = 0;
2243         int wret;
2244         int level;
2245         struct btrfs_path *path;
2246         int i;
2247         int orig_level;
2248         struct btrfs_root_item *root_item = &root->root_item;
2249
2250         path = btrfs_alloc_path();
2251         BUG_ON(!path);
2252
2253         level = btrfs_header_level(root->node);
2254         orig_level = level;
2255         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2256                 path->nodes[level] = root->node;
2257                 extent_buffer_get(root->node);
2258                 path->slots[level] = 0;
2259         } else {
2260                 struct btrfs_key key;
2261                 struct btrfs_disk_key found_key;
2262                 struct extent_buffer *node;
2263
2264                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2265                 level = root_item->drop_level;
2266                 path->lowest_level = level;
2267                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2268                 if (wret < 0) {
2269                         ret = wret;
2270                         goto out;
2271                 }
2272                 node = path->nodes[level];
2273                 btrfs_node_key(node, &found_key, path->slots[level]);
2274                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2275                                sizeof(found_key)));
2276         }
2277         while(1) {
2278                 wret = walk_down_tree(trans, root, path, &level);
2279                 if (wret > 0)
2280                         break;
2281                 if (wret < 0)
2282                         ret = wret;
2283
2284                 wret = walk_up_tree(trans, root, path, &level);
2285                 if (wret > 0)
2286                         break;
2287                 if (wret < 0)
2288                         ret = wret;
2289                 ret = -EAGAIN;
2290                 break;
2291         }
2292         for (i = 0; i <= orig_level; i++) {
2293                 if (path->nodes[i]) {
2294                         free_extent_buffer(path->nodes[i]);
2295                         path->nodes[i] = NULL;
2296                 }
2297         }
2298 out:
2299         btrfs_free_path(path);
2300         return ret;
2301 }
2302
2303 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2304 {
2305         u64 start;
2306         u64 end;
2307         u64 ptr;
2308         int ret;
2309         while(1) {
2310                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2311                                             &start, &end, (unsigned int)-1);
2312                 if (ret)
2313                         break;
2314                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2315                 if (!ret)
2316                         kfree((void *)(unsigned long)ptr);
2317                 clear_extent_bits(&info->block_group_cache, start,
2318                                   end, (unsigned int)-1, GFP_NOFS);
2319         }
2320         while(1) {
2321                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2322                                             &start, &end, EXTENT_DIRTY);
2323                 if (ret)
2324                         break;
2325                 clear_extent_dirty(&info->free_space_cache, start,
2326                                    end, GFP_NOFS);
2327         }
2328         return 0;
2329 }
2330
2331 static unsigned long calc_ra(unsigned long start, unsigned long last,
2332                              unsigned long nr)
2333 {
2334         return min(last, start + nr - 1);
2335 }
2336
2337 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2338                                          u64 len)
2339 {
2340         u64 page_start;
2341         u64 page_end;
2342         unsigned long last_index;
2343         unsigned long i;
2344         struct page *page;
2345         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2346         struct file_ra_state *ra;
2347         unsigned long total_read = 0;
2348         unsigned long ra_pages;
2349         struct btrfs_trans_handle *trans;
2350
2351         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2352
2353         mutex_lock(&inode->i_mutex);
2354         i = start >> PAGE_CACHE_SHIFT;
2355         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2356
2357         ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
2358
2359         file_ra_state_init(ra, inode->i_mapping);
2360
2361         for (; i <= last_index; i++) {
2362                 if (total_read % ra_pages == 0) {
2363                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
2364                                        calc_ra(i, last_index, ra_pages));
2365                 }
2366                 total_read++;
2367                 if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size)
2368                         goto truncate_racing;
2369
2370                 page = grab_cache_page(inode->i_mapping, i);
2371                 if (!page) {
2372                         goto out_unlock;
2373                 }
2374                 if (!PageUptodate(page)) {
2375                         btrfs_readpage(NULL, page);
2376                         lock_page(page);
2377                         if (!PageUptodate(page)) {
2378                                 unlock_page(page);
2379                                 page_cache_release(page);
2380                                 goto out_unlock;
2381                         }
2382                 }
2383 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2384                 ClearPageDirty(page);
2385 #else
2386                 cancel_dirty_page(page, PAGE_CACHE_SIZE);
2387 #endif
2388                 wait_on_page_writeback(page);
2389                 set_page_extent_mapped(page);
2390                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2391                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2392
2393                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2394
2395                 set_extent_delalloc(io_tree, page_start,
2396                                     page_end, GFP_NOFS);
2397                 set_page_dirty(page);
2398
2399                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2400                 unlock_page(page);
2401                 page_cache_release(page);
2402         }
2403         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2404                                            total_read);
2405
2406 out_unlock:
2407         kfree(ra);
2408         trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
2409         if (trans) {
2410                 btrfs_add_ordered_inode(inode);
2411                 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
2412                 mark_inode_dirty(inode);
2413         }
2414         mutex_unlock(&inode->i_mutex);
2415         return 0;
2416
2417 truncate_racing:
2418         vmtruncate(inode, inode->i_size);
2419         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2420                                            total_read);
2421         goto out_unlock;
2422 }
2423
2424 /*
2425  * The back references tell us which tree holds a ref on a block,
2426  * but it is possible for the tree root field in the reference to
2427  * reflect the original root before a snapshot was made.  In this
2428  * case we should search through all the children of a given root
2429  * to find potential holders of references on a block.
2430  *
2431  * Instead, we do something a little less fancy and just search
2432  * all the roots for a given key/block combination.
2433  */
2434 static int find_root_for_ref(struct btrfs_root *root,
2435                              struct btrfs_path *path,
2436                              struct btrfs_key *key0,
2437                              int level,
2438                              int file_key,
2439                              struct btrfs_root **found_root,
2440                              u64 bytenr)
2441 {
2442         struct btrfs_key root_location;
2443         struct btrfs_root *cur_root = *found_root;
2444         struct btrfs_file_extent_item *file_extent;
2445         u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
2446         u64 found_bytenr;
2447         int ret;
2448         int i;
2449
2450         root_location.offset = (u64)-1;
2451         root_location.type = BTRFS_ROOT_ITEM_KEY;
2452         path->lowest_level = level;
2453         path->reada = 0;
2454         while(1) {
2455                 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0);
2456                 found_bytenr = 0;
2457                 if (ret == 0 && file_key) {
2458                         struct extent_buffer *leaf = path->nodes[0];
2459                         file_extent = btrfs_item_ptr(leaf, path->slots[0],
2460                                              struct btrfs_file_extent_item);
2461                         if (btrfs_file_extent_type(leaf, file_extent) ==
2462                             BTRFS_FILE_EXTENT_REG) {
2463                                 found_bytenr =
2464                                         btrfs_file_extent_disk_bytenr(leaf,
2465                                                                file_extent);
2466                        }
2467                 } else if (ret == 0) {
2468                         if (path->nodes[level])
2469                                 found_bytenr = path->nodes[level]->start;
2470                 }
2471
2472                 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2473                         if (!path->nodes[i])
2474                                 break;
2475                         free_extent_buffer(path->nodes[i]);
2476                         path->nodes[i] = NULL;
2477                 }
2478                 btrfs_release_path(cur_root, path);
2479
2480                 if (found_bytenr == bytenr) {
2481                         *found_root = cur_root;
2482                         ret = 0;
2483                         goto out;
2484                 }
2485                 ret = btrfs_search_root(root->fs_info->tree_root,
2486                                         root_search_start, &root_search_start);
2487                 if (ret)
2488                         break;
2489
2490                 root_location.objectid = root_search_start;
2491                 cur_root = btrfs_read_fs_root_no_name(root->fs_info,
2492                                                       &root_location);
2493                 if (!cur_root) {
2494                         ret = 1;
2495                         break;
2496                 }
2497         }
2498 out:
2499         path->lowest_level = 0;
2500         return ret;
2501 }
2502
2503 /*
2504  * note, this releases the path
2505  */
2506 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2507                                   struct btrfs_path *path,
2508                                   struct btrfs_key *extent_key)
2509 {
2510         struct inode *inode;
2511         struct btrfs_root *found_root;
2512         struct btrfs_key root_location;
2513         struct btrfs_key found_key;
2514         struct btrfs_extent_ref *ref;
2515         u64 ref_root;
2516         u64 ref_gen;
2517         u64 ref_objectid;
2518         u64 ref_offset;
2519         int ret;
2520         int level;
2521
2522         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2523                              struct btrfs_extent_ref);
2524         ref_root = btrfs_ref_root(path->nodes[0], ref);
2525         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2526         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2527         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2528         btrfs_release_path(extent_root, path);
2529
2530         root_location.objectid = ref_root;
2531         if (ref_gen == 0)
2532                 root_location.offset = 0;
2533         else
2534                 root_location.offset = (u64)-1;
2535         root_location.type = BTRFS_ROOT_ITEM_KEY;
2536
2537         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2538                                                 &root_location);
2539         BUG_ON(!found_root);
2540
2541         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2542                 found_key.objectid = ref_objectid;
2543                 found_key.type = BTRFS_EXTENT_DATA_KEY;
2544                 found_key.offset = ref_offset;
2545                 level = 0;
2546
2547                 ret = find_root_for_ref(extent_root, path, &found_key,
2548                                         level, 1, &found_root,
2549                                         extent_key->objectid);
2550
2551                 if (ret)
2552                         goto out;
2553
2554                 mutex_unlock(&extent_root->fs_info->fs_mutex);
2555                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2556                                           ref_objectid, found_root);
2557                 if (inode->i_state & I_NEW) {
2558                         /* the inode and parent dir are two different roots */
2559                         BTRFS_I(inode)->root = found_root;
2560                         BTRFS_I(inode)->location.objectid = ref_objectid;
2561                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2562                         BTRFS_I(inode)->location.offset = 0;
2563                         btrfs_read_locked_inode(inode);
2564                         unlock_new_inode(inode);
2565
2566                 }
2567                 /* this can happen if the reference is not against
2568                  * the latest version of the tree root
2569                  */
2570                 if (is_bad_inode(inode)) {
2571                         mutex_lock(&extent_root->fs_info->fs_mutex);
2572                         goto out;
2573                 }
2574                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2575                 iput(inode);
2576                 mutex_lock(&extent_root->fs_info->fs_mutex);
2577         } else {
2578                 struct btrfs_trans_handle *trans;
2579                 struct extent_buffer *eb;
2580                 int i;
2581
2582                 eb = read_tree_block(found_root, extent_key->objectid,
2583                                      extent_key->offset);
2584                 level = btrfs_header_level(eb);
2585
2586                 if (level == 0)
2587                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2588                 else
2589                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2590
2591                 free_extent_buffer(eb);
2592
2593                 ret = find_root_for_ref(extent_root, path, &found_key,
2594                                         level, 0, &found_root,
2595                                         extent_key->objectid);
2596
2597                 if (ret)
2598                         goto out;
2599
2600                 trans = btrfs_start_transaction(found_root, 1);
2601
2602                 path->lowest_level = level;
2603                 path->reada = 2;
2604                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2605                                         0, 1);
2606                 path->lowest_level = 0;
2607                 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2608                         if (!path->nodes[i])
2609                                 break;
2610                         free_extent_buffer(path->nodes[i]);
2611                         path->nodes[i] = NULL;
2612                 }
2613                 btrfs_release_path(found_root, path);
2614                 btrfs_end_transaction(trans, found_root);
2615         }
2616
2617 out:
2618         return 0;
2619 }
2620
2621 static int noinline del_extent_zero(struct btrfs_root *extent_root,
2622                                     struct btrfs_path *path,
2623                                     struct btrfs_key *extent_key)
2624 {
2625         int ret;
2626         struct btrfs_trans_handle *trans;
2627
2628         trans = btrfs_start_transaction(extent_root, 1);
2629         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
2630         if (ret > 0) {
2631                 ret = -EIO;
2632                 goto out;
2633         }
2634         if (ret < 0)
2635                 goto out;
2636         ret = btrfs_del_item(trans, extent_root, path);
2637 out:
2638         btrfs_end_transaction(trans, extent_root);
2639         return ret;
2640 }
2641
2642 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2643                                         struct btrfs_path *path,
2644                                         struct btrfs_key *extent_key)
2645 {
2646         struct btrfs_key key;
2647         struct btrfs_key found_key;
2648         struct extent_buffer *leaf;
2649         u32 nritems;
2650         u32 item_size;
2651         int ret = 0;
2652
2653         if (extent_key->objectid == 0) {
2654                 ret = del_extent_zero(extent_root, path, extent_key);
2655                 goto out;
2656         }
2657         key.objectid = extent_key->objectid;
2658         key.type = BTRFS_EXTENT_REF_KEY;
2659         key.offset = 0;
2660
2661         while(1) {
2662                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2663
2664                 if (ret < 0)
2665                         goto out;
2666
2667                 ret = 0;
2668                 leaf = path->nodes[0];
2669                 nritems = btrfs_header_nritems(leaf);
2670                 if (path->slots[0] == nritems) {
2671                         ret = btrfs_next_leaf(extent_root, path);
2672                         if (ret > 0) {
2673                                 ret = 0;
2674                                 goto out;
2675                         }
2676                         if (ret < 0)
2677                                 goto out;
2678                         leaf = path->nodes[0];
2679                 }
2680
2681                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2682                 if (found_key.objectid != extent_key->objectid) {
2683                         break;
2684                 }
2685
2686                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
2687                         break;
2688                 }
2689
2690                 key.offset = found_key.offset + 1;
2691                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2692
2693                 ret = relocate_one_reference(extent_root, path, extent_key);
2694                 if (ret)
2695                         goto out;
2696         }
2697         ret = 0;
2698 out:
2699         btrfs_release_path(extent_root, path);
2700         return ret;
2701 }
2702
2703 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
2704 {
2705         u64 num_devices;
2706         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
2707                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
2708
2709         num_devices = root->fs_info->fs_devices->num_devices;
2710         if (num_devices == 1) {
2711                 stripped |= BTRFS_BLOCK_GROUP_DUP;
2712                 stripped = flags & ~stripped;
2713
2714                 /* turn raid0 into single device chunks */
2715                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
2716                         return stripped;
2717
2718                 /* turn mirroring into duplication */
2719                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
2720                              BTRFS_BLOCK_GROUP_RAID10))
2721                         return stripped | BTRFS_BLOCK_GROUP_DUP;
2722                 return flags;
2723         } else {
2724                 /* they already had raid on here, just return */
2725                 if (flags & stripped)
2726                         return flags;
2727
2728                 stripped |= BTRFS_BLOCK_GROUP_DUP;
2729                 stripped = flags & ~stripped;
2730
2731                 /* switch duplicated blocks with raid1 */
2732                 if (flags & BTRFS_BLOCK_GROUP_DUP)
2733                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
2734
2735                 /* turn single device chunks into raid0 */
2736                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
2737         }
2738         return flags;
2739 }
2740
2741 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
2742 {
2743         struct btrfs_trans_handle *trans;
2744         struct btrfs_root *tree_root = root->fs_info->tree_root;
2745         struct btrfs_path *path;
2746         u64 cur_byte;
2747         u64 total_found;
2748         u64 shrink_last_byte;
2749         u64 new_alloc_flags;
2750         struct btrfs_block_group_cache *shrink_block_group;
2751         struct btrfs_fs_info *info = root->fs_info;
2752         struct btrfs_key key;
2753         struct btrfs_key found_key;
2754         struct extent_buffer *leaf;
2755         u32 nritems;
2756         int ret;
2757         int progress;
2758
2759         shrink_block_group = btrfs_lookup_block_group(root->fs_info,
2760                                                       shrink_start);
2761         BUG_ON(!shrink_block_group);
2762
2763         shrink_last_byte = shrink_start + shrink_block_group->key.offset;
2764
2765         shrink_block_group->space_info->total_bytes -=
2766                 shrink_block_group->key.offset;
2767         path = btrfs_alloc_path();
2768         root = root->fs_info->extent_root;
2769         path->reada = 2;
2770
2771 again:
2772         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
2773                 trans = btrfs_start_transaction(root, 1);
2774                 new_alloc_flags = update_block_group_flags(root,
2775                                                    shrink_block_group->flags);
2776                 do_chunk_alloc(trans, root->fs_info->extent_root,
2777                         btrfs_block_group_used(&shrink_block_group->item) +
2778                         2 * 1024 * 1024, new_alloc_flags);
2779                 btrfs_end_transaction(trans, root);
2780         }
2781         shrink_block_group->ro = 1;
2782
2783         total_found = 0;
2784         progress = 0;
2785         key.objectid = shrink_start;
2786         key.offset = 0;
2787         key.type = 0;
2788         cur_byte = key.objectid;
2789
2790         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2791         if (ret < 0)
2792                 goto out;
2793
2794         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
2795         if (ret < 0)
2796                 goto out;
2797
2798         if (ret == 0) {
2799                 leaf = path->nodes[0];
2800                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2801                 if (found_key.objectid + found_key.offset > shrink_start &&
2802                     found_key.objectid < shrink_last_byte) {
2803                         cur_byte = found_key.objectid;
2804                         key.objectid = cur_byte;
2805                 }
2806         }
2807         btrfs_release_path(root, path);
2808
2809         while(1) {
2810                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2811                 if (ret < 0)
2812                         goto out;
2813
2814                 leaf = path->nodes[0];
2815                 nritems = btrfs_header_nritems(leaf);
2816 next:
2817                 if (path->slots[0] >= nritems) {
2818                         ret = btrfs_next_leaf(root, path);
2819                         if (ret < 0)
2820                                 goto out;
2821                         if (ret == 1) {
2822                                 ret = 0;
2823                                 break;
2824                         }
2825                         leaf = path->nodes[0];
2826                         nritems = btrfs_header_nritems(leaf);
2827                 }
2828
2829                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2830
2831                 if (found_key.objectid >= shrink_last_byte)
2832                         break;
2833
2834                 if (progress && need_resched()) {
2835                         memcpy(&key, &found_key, sizeof(key));
2836                         mutex_unlock(&root->fs_info->fs_mutex);
2837                         cond_resched();
2838                         mutex_lock(&root->fs_info->fs_mutex);
2839                         btrfs_release_path(root, path);
2840                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
2841                         progress = 0;
2842                         goto next;
2843                 }
2844                 progress = 1;
2845
2846                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
2847                     found_key.objectid + found_key.offset <= cur_byte) {
2848                         path->slots[0]++;
2849                         goto next;
2850                 }
2851
2852                 total_found++;
2853                 cur_byte = found_key.objectid + found_key.offset;
2854                 key.objectid = cur_byte;
2855                 btrfs_release_path(root, path);
2856                 ret = relocate_one_extent(root, path, &found_key);
2857         }
2858
2859         btrfs_release_path(root, path);
2860
2861         if (total_found > 0) {
2862                 trans = btrfs_start_transaction(tree_root, 1);
2863                 btrfs_commit_transaction(trans, tree_root);
2864
2865                 mutex_unlock(&root->fs_info->fs_mutex);
2866                 btrfs_clean_old_snapshots(tree_root);
2867                 mutex_lock(&root->fs_info->fs_mutex);
2868
2869                 trans = btrfs_start_transaction(tree_root, 1);
2870                 btrfs_commit_transaction(trans, tree_root);
2871                 goto again;
2872         }
2873
2874         /*
2875          * we've freed all the extents, now remove the block
2876          * group item from the tree
2877          */
2878         trans = btrfs_start_transaction(root, 1);
2879         memcpy(&key, &shrink_block_group->key, sizeof(key));
2880
2881         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2882         if (ret > 0)
2883                 ret = -EIO;
2884         if (ret < 0)
2885                 goto out;
2886
2887         leaf = path->nodes[0];
2888         nritems = btrfs_header_nritems(leaf);
2889         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2890         kfree(shrink_block_group);
2891
2892         clear_extent_bits(&info->block_group_cache, found_key.objectid,
2893                           found_key.objectid + found_key.offset - 1,
2894                           (unsigned int)-1, GFP_NOFS);
2895
2896         btrfs_del_item(trans, root, path);
2897         clear_extent_dirty(&info->free_space_cache,
2898                            shrink_start, shrink_last_byte - 1,
2899                            GFP_NOFS);
2900         btrfs_commit_transaction(trans, root);
2901 out:
2902         btrfs_free_path(path);
2903         return ret;
2904 }
2905
2906 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
2907                            struct btrfs_key *key)
2908 {
2909         int ret;
2910         struct btrfs_key found_key;
2911         struct extent_buffer *leaf;
2912         int slot;
2913
2914         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2915         if (ret < 0)
2916                 return ret;
2917         while(1) {
2918                 slot = path->slots[0];
2919                 leaf = path->nodes[0];
2920                 if (slot >= btrfs_header_nritems(leaf)) {
2921                         ret = btrfs_next_leaf(root, path);
2922                         if (ret == 0)
2923                                 continue;
2924                         if (ret < 0)
2925                                 goto error;
2926                         break;
2927                 }
2928                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2929
2930                 if (found_key.objectid >= key->objectid &&
2931                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
2932                         return 0;
2933                 path->slots[0]++;
2934         }
2935         ret = -ENOENT;
2936 error:
2937         return ret;
2938 }
2939
2940 int btrfs_read_block_groups(struct btrfs_root *root)
2941 {
2942         struct btrfs_path *path;
2943         int ret;
2944         int bit;
2945         struct btrfs_block_group_cache *cache;
2946         struct btrfs_fs_info *info = root->fs_info;
2947         struct btrfs_space_info *space_info;
2948         struct extent_io_tree *block_group_cache;
2949         struct btrfs_key key;
2950         struct btrfs_key found_key;
2951         struct extent_buffer *leaf;
2952
2953         block_group_cache = &info->block_group_cache;
2954         root = info->extent_root;
2955         key.objectid = 0;
2956         key.offset = 0;
2957         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2958         path = btrfs_alloc_path();
2959         if (!path)
2960                 return -ENOMEM;
2961
2962         while(1) {
2963                 ret = find_first_block_group(root, path, &key);
2964                 if (ret > 0) {
2965                         ret = 0;
2966                         goto error;
2967                 }
2968                 if (ret != 0)
2969                         goto error;
2970
2971                 leaf = path->nodes[0];
2972                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2973                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
2974                 if (!cache) {
2975                         ret = -ENOMEM;
2976                         break;
2977                 }
2978
2979                 read_extent_buffer(leaf, &cache->item,
2980                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
2981                                    sizeof(cache->item));
2982                 memcpy(&cache->key, &found_key, sizeof(found_key));
2983
2984                 key.objectid = found_key.objectid + found_key.offset;
2985                 btrfs_release_path(root, path);
2986                 cache->flags = btrfs_block_group_flags(&cache->item);
2987                 bit = 0;
2988                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2989                         bit = BLOCK_GROUP_DATA;
2990                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2991                         bit = BLOCK_GROUP_SYSTEM;
2992                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2993                         bit = BLOCK_GROUP_METADATA;
2994                 }
2995                 set_avail_alloc_bits(info, cache->flags);
2996
2997                 ret = update_space_info(info, cache->flags, found_key.offset,
2998                                         btrfs_block_group_used(&cache->item),
2999                                         &space_info);
3000                 BUG_ON(ret);
3001                 cache->space_info = space_info;
3002
3003                 /* use EXTENT_LOCKED to prevent merging */
3004                 set_extent_bits(block_group_cache, found_key.objectid,
3005                                 found_key.objectid + found_key.offset - 1,
3006                                 bit | EXTENT_LOCKED, GFP_NOFS);
3007                 set_state_private(block_group_cache, found_key.objectid,
3008                                   (unsigned long)cache);
3009
3010                 if (key.objectid >=
3011                     btrfs_super_total_bytes(&info->super_copy))
3012                         break;
3013         }
3014         ret = 0;
3015 error:
3016         btrfs_free_path(path);
3017         return ret;
3018 }
3019
3020 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3021                            struct btrfs_root *root, u64 bytes_used,
3022                            u64 type, u64 chunk_objectid, u64 chunk_offset,
3023                            u64 size)
3024 {
3025         int ret;
3026         int bit = 0;
3027         struct btrfs_root *extent_root;
3028         struct btrfs_block_group_cache *cache;
3029         struct extent_io_tree *block_group_cache;
3030
3031         extent_root = root->fs_info->extent_root;
3032         block_group_cache = &root->fs_info->block_group_cache;
3033
3034         cache = kzalloc(sizeof(*cache), GFP_NOFS);
3035         BUG_ON(!cache);
3036         cache->key.objectid = chunk_offset;
3037         cache->key.offset = size;
3038
3039         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3040         memset(&cache->item, 0, sizeof(cache->item));
3041         btrfs_set_block_group_used(&cache->item, bytes_used);
3042         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
3043         cache->flags = type;
3044         btrfs_set_block_group_flags(&cache->item, type);
3045
3046         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
3047                                 &cache->space_info);
3048         BUG_ON(ret);
3049
3050         bit = block_group_state_bits(type);
3051         set_extent_bits(block_group_cache, chunk_offset,
3052                         chunk_offset + size - 1,
3053                         bit | EXTENT_LOCKED, GFP_NOFS);
3054
3055         set_state_private(block_group_cache, chunk_offset,
3056                           (unsigned long)cache);
3057         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3058                                 sizeof(cache->item));
3059         BUG_ON(ret);
3060
3061         finish_current_insert(trans, extent_root);
3062         ret = del_pending_extents(trans, extent_root);
3063         BUG_ON(ret);
3064         set_avail_alloc_bits(extent_root->fs_info, type);
3065         return 0;
3066 }