Btrfs: Add shared reference cache
[safe/jmp/linux-2.6] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include "hash.h"
23 #include "crc32c.h"
24 #include "ctree.h"
25 #include "disk-io.h"
26 #include "print-tree.h"
27 #include "transaction.h"
28 #include "volumes.h"
29 #include "locking.h"
30 #include "ref-cache.h"
31
32 #define PENDING_EXTENT_INSERT 0
33 #define PENDING_EXTENT_DELETE 1
34 #define PENDING_BACKREF_UPDATE 2
35
36 struct pending_extent_op {
37         int type;
38         u64 bytenr;
39         u64 num_bytes;
40         u64 parent;
41         u64 orig_parent;
42         u64 generation;
43         u64 orig_generation;
44         int level;
45 };
46
47 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
48                                  btrfs_root *extent_root);
49 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
50                                btrfs_root *extent_root);
51 static struct btrfs_block_group_cache *
52 __btrfs_find_block_group(struct btrfs_root *root,
53                          struct btrfs_block_group_cache *hint,
54                          u64 search_start, int data, int owner);
55
56 void maybe_lock_mutex(struct btrfs_root *root)
57 {
58         if (root != root->fs_info->extent_root &&
59             root != root->fs_info->chunk_root &&
60             root != root->fs_info->dev_root) {
61                 mutex_lock(&root->fs_info->alloc_mutex);
62         }
63 }
64
65 void maybe_unlock_mutex(struct btrfs_root *root)
66 {
67         if (root != root->fs_info->extent_root &&
68             root != root->fs_info->chunk_root &&
69             root != root->fs_info->dev_root) {
70                 mutex_unlock(&root->fs_info->alloc_mutex);
71         }
72 }
73
74 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
75 {
76         return (cache->flags & bits) == bits;
77 }
78
79 /*
80  * this adds the block group to the fs_info rb tree for the block group
81  * cache
82  */
83 int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
84                                 struct btrfs_block_group_cache *block_group)
85 {
86         struct rb_node **p;
87         struct rb_node *parent = NULL;
88         struct btrfs_block_group_cache *cache;
89
90         spin_lock(&info->block_group_cache_lock);
91         p = &info->block_group_cache_tree.rb_node;
92
93         while (*p) {
94                 parent = *p;
95                 cache = rb_entry(parent, struct btrfs_block_group_cache,
96                                  cache_node);
97                 if (block_group->key.objectid < cache->key.objectid) {
98                         p = &(*p)->rb_left;
99                 } else if (block_group->key.objectid > cache->key.objectid) {
100                         p = &(*p)->rb_right;
101                 } else {
102                         spin_unlock(&info->block_group_cache_lock);
103                         return -EEXIST;
104                 }
105         }
106
107         rb_link_node(&block_group->cache_node, parent, p);
108         rb_insert_color(&block_group->cache_node,
109                         &info->block_group_cache_tree);
110         spin_unlock(&info->block_group_cache_lock);
111
112         return 0;
113 }
114
115 /*
116  * This will return the block group at or after bytenr if contains is 0, else
117  * it will return the block group that contains the bytenr
118  */
119 static struct btrfs_block_group_cache *
120 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
121                               int contains)
122 {
123         struct btrfs_block_group_cache *cache, *ret = NULL;
124         struct rb_node *n;
125         u64 end, start;
126
127         spin_lock(&info->block_group_cache_lock);
128         n = info->block_group_cache_tree.rb_node;
129
130         while (n) {
131                 cache = rb_entry(n, struct btrfs_block_group_cache,
132                                  cache_node);
133                 end = cache->key.objectid + cache->key.offset - 1;
134                 start = cache->key.objectid;
135
136                 if (bytenr < start) {
137                         if (!contains && (!ret || start < ret->key.objectid))
138                                 ret = cache;
139                         n = n->rb_left;
140                 } else if (bytenr > start) {
141                         if (contains && bytenr <= end) {
142                                 ret = cache;
143                                 break;
144                         }
145                         n = n->rb_right;
146                 } else {
147                         ret = cache;
148                         break;
149                 }
150         }
151         spin_unlock(&info->block_group_cache_lock);
152
153         return ret;
154 }
155
156 /*
157  * this is only called by cache_block_group, since we could have freed extents
158  * we need to check the pinned_extents for any extents that can't be used yet
159  * since their free space will be released as soon as the transaction commits.
160  */
161 static int add_new_free_space(struct btrfs_block_group_cache *block_group,
162                               struct btrfs_fs_info *info, u64 start, u64 end)
163 {
164         u64 extent_start, extent_end, size;
165         int ret;
166
167         while (start < end) {
168                 ret = find_first_extent_bit(&info->pinned_extents, start,
169                                             &extent_start, &extent_end,
170                                             EXTENT_DIRTY);
171                 if (ret)
172                         break;
173
174                 if (extent_start == start) {
175                         start = extent_end + 1;
176                 } else if (extent_start > start && extent_start < end) {
177                         size = extent_start - start;
178                         ret = btrfs_add_free_space(block_group, start, size);
179                         BUG_ON(ret);
180                         start = extent_end + 1;
181                 } else {
182                         break;
183                 }
184         }
185
186         if (start < end) {
187                 size = end - start;
188                 ret = btrfs_add_free_space(block_group, start, size);
189                 BUG_ON(ret);
190         }
191
192         return 0;
193 }
194
195 static int cache_block_group(struct btrfs_root *root,
196                              struct btrfs_block_group_cache *block_group)
197 {
198         struct btrfs_path *path;
199         int ret = 0;
200         struct btrfs_key key;
201         struct extent_buffer *leaf;
202         int slot;
203         u64 last = 0;
204         u64 first_free;
205         int found = 0;
206
207         if (!block_group)
208                 return 0;
209
210         root = root->fs_info->extent_root;
211
212         if (block_group->cached)
213                 return 0;
214
215         path = btrfs_alloc_path();
216         if (!path)
217                 return -ENOMEM;
218
219         path->reada = 2;
220         /*
221          * we get into deadlocks with paths held by callers of this function.
222          * since the alloc_mutex is protecting things right now, just
223          * skip the locking here
224          */
225         path->skip_locking = 1;
226         first_free = max_t(u64, block_group->key.objectid,
227                            BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
228         key.objectid = block_group->key.objectid;
229         key.offset = 0;
230         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
231         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
232         if (ret < 0)
233                 goto err;
234         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
235         if (ret < 0)
236                 goto err;
237         if (ret == 0) {
238                 leaf = path->nodes[0];
239                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
240                 if (key.objectid + key.offset > first_free)
241                         first_free = key.objectid + key.offset;
242         }
243         while(1) {
244                 leaf = path->nodes[0];
245                 slot = path->slots[0];
246                 if (slot >= btrfs_header_nritems(leaf)) {
247                         ret = btrfs_next_leaf(root, path);
248                         if (ret < 0)
249                                 goto err;
250                         if (ret == 0)
251                                 continue;
252                         else
253                                 break;
254                 }
255                 btrfs_item_key_to_cpu(leaf, &key, slot);
256                 if (key.objectid < block_group->key.objectid)
257                         goto next;
258
259                 if (key.objectid >= block_group->key.objectid +
260                     block_group->key.offset)
261                         break;
262
263                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
264                         if (!found) {
265                                 last = first_free;
266                                 found = 1;
267                         }
268
269                         add_new_free_space(block_group, root->fs_info, last,
270                                            key.objectid);
271
272                         last = key.objectid + key.offset;
273                 }
274 next:
275                 path->slots[0]++;
276         }
277
278         if (!found)
279                 last = first_free;
280
281         add_new_free_space(block_group, root->fs_info, last,
282                            block_group->key.objectid +
283                            block_group->key.offset);
284
285         block_group->cached = 1;
286         ret = 0;
287 err:
288         btrfs_free_path(path);
289         return ret;
290 }
291
292 /*
293  * return the block group that starts at or after bytenr
294  */
295 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
296                                                        btrfs_fs_info *info,
297                                                          u64 bytenr)
298 {
299         struct btrfs_block_group_cache *cache;
300
301         cache = block_group_cache_tree_search(info, bytenr, 0);
302
303         return cache;
304 }
305
306 /*
307  * return the block group that contains teh given bytenr
308  */
309 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
310                                                          btrfs_fs_info *info,
311                                                          u64 bytenr)
312 {
313         struct btrfs_block_group_cache *cache;
314
315         cache = block_group_cache_tree_search(info, bytenr, 1);
316
317         return cache;
318 }
319
320 static int noinline find_free_space(struct btrfs_root *root,
321                                     struct btrfs_block_group_cache **cache_ret,
322                                     u64 *start_ret, u64 num, int data)
323 {
324         int ret;
325         struct btrfs_block_group_cache *cache = *cache_ret;
326         struct btrfs_free_space *info = NULL;
327         u64 last;
328         u64 search_start = *start_ret;
329
330         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
331         if (!cache)
332                 goto out;
333
334         last = max(search_start, cache->key.objectid);
335
336 again:
337         ret = cache_block_group(root, cache);
338         if (ret)
339                 goto out;
340
341         if (cache->ro || !block_group_bits(cache, data))
342                 goto new_group;
343
344         info = btrfs_find_free_space(cache, last, num);
345         if (info) {
346                 *start_ret = info->offset;
347                 return 0;
348         }
349
350 new_group:
351         last = cache->key.objectid + cache->key.offset;
352
353         cache = btrfs_lookup_first_block_group(root->fs_info, last);
354         if (!cache)
355                 goto out;
356
357         *cache_ret = cache;
358         goto again;
359
360 out:
361         return -ENOSPC;
362 }
363
364 static u64 div_factor(u64 num, int factor)
365 {
366         if (factor == 10)
367                 return num;
368         num *= factor;
369         do_div(num, 10);
370         return num;
371 }
372
373 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
374                                                   u64 flags)
375 {
376         struct list_head *head = &info->space_info;
377         struct list_head *cur;
378         struct btrfs_space_info *found;
379         list_for_each(cur, head) {
380                 found = list_entry(cur, struct btrfs_space_info, list);
381                 if (found->flags == flags)
382                         return found;
383         }
384         return NULL;
385 }
386
387 static struct btrfs_block_group_cache *
388 __btrfs_find_block_group(struct btrfs_root *root,
389                          struct btrfs_block_group_cache *hint,
390                          u64 search_start, int data, int owner)
391 {
392         struct btrfs_block_group_cache *cache;
393         struct btrfs_block_group_cache *found_group = NULL;
394         struct btrfs_fs_info *info = root->fs_info;
395         u64 used;
396         u64 last = 0;
397         u64 free_check;
398         int full_search = 0;
399         int factor = 10;
400         int wrapped = 0;
401
402         if (data & BTRFS_BLOCK_GROUP_METADATA)
403                 factor = 9;
404
405         if (search_start) {
406                 struct btrfs_block_group_cache *shint;
407                 shint = btrfs_lookup_first_block_group(info, search_start);
408                 if (shint && block_group_bits(shint, data) && !shint->ro) {
409                         spin_lock(&shint->lock);
410                         used = btrfs_block_group_used(&shint->item);
411                         if (used + shint->pinned + shint->reserved <
412                             div_factor(shint->key.offset, factor)) {
413                                 spin_unlock(&shint->lock);
414                                 return shint;
415                         }
416                         spin_unlock(&shint->lock);
417                 }
418         }
419         if (hint && !hint->ro && block_group_bits(hint, data)) {
420                 spin_lock(&hint->lock);
421                 used = btrfs_block_group_used(&hint->item);
422                 if (used + hint->pinned + hint->reserved <
423                     div_factor(hint->key.offset, factor)) {
424                         spin_unlock(&hint->lock);
425                         return hint;
426                 }
427                 spin_unlock(&hint->lock);
428                 last = hint->key.objectid + hint->key.offset;
429         } else {
430                 if (hint)
431                         last = max(hint->key.objectid, search_start);
432                 else
433                         last = search_start;
434         }
435 again:
436         while (1) {
437                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
438                 if (!cache)
439                         break;
440
441                 spin_lock(&cache->lock);
442                 last = cache->key.objectid + cache->key.offset;
443                 used = btrfs_block_group_used(&cache->item);
444
445                 if (!cache->ro && block_group_bits(cache, data)) {
446                         free_check = div_factor(cache->key.offset, factor);
447                         if (used + cache->pinned + cache->reserved <
448                             free_check) {
449                                 found_group = cache;
450                                 spin_unlock(&cache->lock);
451                                 goto found;
452                         }
453                 }
454                 spin_unlock(&cache->lock);
455                 cond_resched();
456         }
457         if (!wrapped) {
458                 last = search_start;
459                 wrapped = 1;
460                 goto again;
461         }
462         if (!full_search && factor < 10) {
463                 last = search_start;
464                 full_search = 1;
465                 factor = 10;
466                 goto again;
467         }
468 found:
469         return found_group;
470 }
471
472 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
473                                                  struct btrfs_block_group_cache
474                                                  *hint, u64 search_start,
475                                                  int data, int owner)
476 {
477
478         struct btrfs_block_group_cache *ret;
479         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
480         return ret;
481 }
482
483 /* simple helper to search for an existing extent at a given offset */
484 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
485 {
486         int ret;
487         struct btrfs_key key;
488         struct btrfs_path *path;
489
490         path = btrfs_alloc_path();
491         BUG_ON(!path);
492         maybe_lock_mutex(root);
493         key.objectid = start;
494         key.offset = len;
495         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
496         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
497                                 0, 0);
498         maybe_unlock_mutex(root);
499         btrfs_free_path(path);
500         return ret;
501 }
502
503 /*
504  * Back reference rules.  Back refs have three main goals:
505  *
506  * 1) differentiate between all holders of references to an extent so that
507  *    when a reference is dropped we can make sure it was a valid reference
508  *    before freeing the extent.
509  *
510  * 2) Provide enough information to quickly find the holders of an extent
511  *    if we notice a given block is corrupted or bad.
512  *
513  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
514  *    maintenance.  This is actually the same as #2, but with a slightly
515  *    different use case.
516  *
517  * File extents can be referenced by:
518  *
519  * - multiple snapshots, subvolumes, or different generations in one subvol
520  * - different files inside a single subvolume
521  * - different offsets inside a file (bookend extents in file.c)
522  *
523  * The extent ref structure has fields for:
524  *
525  * - Objectid of the subvolume root
526  * - Generation number of the tree holding the reference
527  * - objectid of the file holding the reference
528  * - offset in the file corresponding to the key holding the reference
529  * - number of references holding by parent node (alway 1 for tree blocks)
530  *
531  * Btree leaf may hold multiple references to a file extent. In most cases,
532  * these references are from same file and the corresponding offsets inside
533  * the file are close together. So inode objectid and offset in file are
534  * just hints, they provide hints about where in the btree the references
535  * can be found and when we can stop searching.
536  *
537  * When a file extent is allocated the fields are filled in:
538  *     (root_key.objectid, trans->transid, inode objectid, offset in file, 1)
539  *
540  * When a leaf is cow'd new references are added for every file extent found
541  * in the leaf.  It looks similar to the create case, but trans->transid will
542  * be different when the block is cow'd.
543  *
544  *     (root_key.objectid, trans->transid, inode objectid, offset in file,
545  *      number of references in the leaf)
546  *
547  * Because inode objectid and offset in file are just hints, they are not
548  * used when backrefs are deleted. When a file extent is removed either
549  * during snapshot deletion or file truncation, we find the corresponding
550  * back back reference and check the following fields.
551  *
552  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf))
553  *
554  * Btree extents can be referenced by:
555  *
556  * - Different subvolumes
557  * - Different generations of the same subvolume
558  *
559  * When a tree block is created, back references are inserted:
560  *
561  * (root->root_key.objectid, trans->transid, level, 0, 1)
562  *
563  * When a tree block is cow'd, new back references are added for all the
564  * blocks it points to. If the tree block isn't in reference counted root,
565  * the old back references are removed. These new back references are of
566  * the form (trans->transid will have increased since creation):
567  *
568  * (root->root_key.objectid, trans->transid, level, 0, 1)
569  *
570  * When a backref is in deleting, the following fields are checked:
571  *
572  * if backref was for a tree root:
573  *     (btrfs_header_owner(itself), btrfs_header_generation(itself))
574  * else
575  *     (btrfs_header_owner(parent), btrfs_header_generation(parent))
576  *
577  * Back Reference Key composing:
578  *
579  * The key objectid corresponds to the first byte in the extent, the key
580  * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
581  * byte of parent extent. If a extent is tree root, the key offset is set
582  * to the key objectid.
583  */
584
585 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
586                                           struct btrfs_root *root,
587                                           struct btrfs_path *path, u64 bytenr,
588                                           u64 parent, u64 ref_root,
589                                           u64 ref_generation, int del)
590 {
591         struct btrfs_key key;
592         struct btrfs_extent_ref *ref;
593         struct extent_buffer *leaf;
594         int ret;
595
596         key.objectid = bytenr;
597         key.type = BTRFS_EXTENT_REF_KEY;
598         key.offset = parent;
599
600         ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
601         if (ret < 0)
602                 goto out;
603         if (ret > 0) {
604                 ret = -ENOENT;
605                 goto out;
606         }
607
608         leaf = path->nodes[0];
609         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
610         if (btrfs_ref_root(leaf, ref) != ref_root ||
611             btrfs_ref_generation(leaf, ref) != ref_generation) {
612                 ret = -EIO;
613                 WARN_ON(1);
614                 goto out;
615         }
616         ret = 0;
617 out:
618         return ret;
619 }
620
621 static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
622                                           struct btrfs_root *root,
623                                           struct btrfs_path *path,
624                                           u64 bytenr, u64 parent,
625                                           u64 ref_root, u64 ref_generation,
626                                           u64 owner_objectid, u64 owner_offset)
627 {
628         struct btrfs_key key;
629         struct extent_buffer *leaf;
630         struct btrfs_extent_ref *ref;
631         u32 num_refs;
632         int ret;
633
634         key.objectid = bytenr;
635         key.type = BTRFS_EXTENT_REF_KEY;
636         key.offset = parent;
637
638         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
639         if (ret == 0) {
640                 leaf = path->nodes[0];
641                 ref = btrfs_item_ptr(leaf, path->slots[0],
642                                      struct btrfs_extent_ref);
643                 btrfs_set_ref_root(leaf, ref, ref_root);
644                 btrfs_set_ref_generation(leaf, ref, ref_generation);
645                 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
646                 btrfs_set_ref_offset(leaf, ref, owner_offset);
647                 btrfs_set_ref_num_refs(leaf, ref, 1);
648         } else if (ret == -EEXIST) {
649                 u64 existing_owner;
650                 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
651                 leaf = path->nodes[0];
652                 ref = btrfs_item_ptr(leaf, path->slots[0],
653                                      struct btrfs_extent_ref);
654                 if (btrfs_ref_root(leaf, ref) != ref_root ||
655                     btrfs_ref_generation(leaf, ref) != ref_generation) {
656                         ret = -EIO;
657                         WARN_ON(1);
658                         goto out;
659                 }
660
661                 num_refs = btrfs_ref_num_refs(leaf, ref);
662                 BUG_ON(num_refs == 0);
663                 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
664
665                 existing_owner = btrfs_ref_objectid(leaf, ref);
666                 if (existing_owner == owner_objectid &&
667                     btrfs_ref_offset(leaf, ref) > owner_offset) {
668                         btrfs_set_ref_offset(leaf, ref, owner_offset);
669                 } else if (existing_owner != owner_objectid &&
670                            existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
671                         btrfs_set_ref_objectid(leaf, ref,
672                                         BTRFS_MULTIPLE_OBJECTIDS);
673                         btrfs_set_ref_offset(leaf, ref, 0);
674                 }
675                 ret = 0;
676         } else {
677                 goto out;
678         }
679         btrfs_mark_buffer_dirty(path->nodes[0]);
680 out:
681         btrfs_release_path(root, path);
682         return ret;
683 }
684
685 static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
686                                           struct btrfs_root *root,
687                                           struct btrfs_path *path)
688 {
689         struct extent_buffer *leaf;
690         struct btrfs_extent_ref *ref;
691         u32 num_refs;
692         int ret = 0;
693
694         leaf = path->nodes[0];
695         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
696         num_refs = btrfs_ref_num_refs(leaf, ref);
697         BUG_ON(num_refs == 0);
698         num_refs -= 1;
699         if (num_refs == 0) {
700                 ret = btrfs_del_item(trans, root, path);
701         } else {
702                 btrfs_set_ref_num_refs(leaf, ref, num_refs);
703                 btrfs_mark_buffer_dirty(leaf);
704         }
705         btrfs_release_path(root, path);
706         return ret;
707 }
708
709 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
710                                      struct btrfs_root *root, u64 bytenr,
711                                      u64 orig_parent, u64 parent,
712                                      u64 orig_root, u64 ref_root,
713                                      u64 orig_generation, u64 ref_generation,
714                                      u64 owner_objectid, u64 owner_offset)
715 {
716         int ret;
717         struct btrfs_root *extent_root = root->fs_info->extent_root;
718         struct btrfs_path *path;
719
720         if (root == root->fs_info->extent_root) {
721                 struct pending_extent_op *extent_op;
722                 u64 num_bytes;
723
724                 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
725                 num_bytes = btrfs_level_size(root, (int)owner_objectid);
726                 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
727                                 bytenr + num_bytes - 1, EXTENT_LOCKED, 0)) {
728                         u64 priv;
729                         ret = get_state_private(&root->fs_info->extent_ins,
730                                                 bytenr, &priv);
731                         BUG_ON(ret);
732                         extent_op = (struct pending_extent_op *)
733                                                         (unsigned long)priv;
734                         BUG_ON(extent_op->parent != orig_parent);
735                         BUG_ON(extent_op->generation != orig_generation);
736                         extent_op->parent = parent;
737                         extent_op->generation = ref_generation;
738                 } else {
739                         extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
740                         BUG_ON(!extent_op);
741
742                         extent_op->type = PENDING_BACKREF_UPDATE;
743                         extent_op->bytenr = bytenr;
744                         extent_op->num_bytes = num_bytes;
745                         extent_op->parent = parent;
746                         extent_op->orig_parent = orig_parent;
747                         extent_op->generation = ref_generation;
748                         extent_op->orig_generation = orig_generation;
749                         extent_op->level = (int)owner_objectid;
750
751                         set_extent_bits(&root->fs_info->extent_ins,
752                                         bytenr, bytenr + num_bytes - 1,
753                                         EXTENT_LOCKED, GFP_NOFS);
754                         set_state_private(&root->fs_info->extent_ins,
755                                           bytenr, (unsigned long)extent_op);
756                 }
757                 return 0;
758         }
759
760         path = btrfs_alloc_path();
761         if (!path)
762                 return -ENOMEM;
763         ret = lookup_extent_backref(trans, extent_root, path,
764                                     bytenr, orig_parent, orig_root,
765                                     orig_generation, 1);
766         if (ret)
767                 goto out;
768         ret = remove_extent_backref(trans, extent_root, path);
769         if (ret)
770                 goto out;
771         ret = insert_extent_backref(trans, extent_root, path, bytenr,
772                                     parent, ref_root, ref_generation,
773                                     owner_objectid, owner_offset);
774         BUG_ON(ret);
775         finish_current_insert(trans, extent_root);
776         del_pending_extents(trans, extent_root);
777 out:
778         btrfs_free_path(path);
779         return ret;
780 }
781
782 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
783                             struct btrfs_root *root, u64 bytenr,
784                             u64 orig_parent, u64 parent,
785                             u64 ref_root, u64 ref_generation,
786                             u64 owner_objectid, u64 owner_offset)
787 {
788         int ret;
789         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
790             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
791                 return 0;
792         maybe_lock_mutex(root);
793         ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
794                                         parent, ref_root, ref_root,
795                                         ref_generation, ref_generation,
796                                         owner_objectid, owner_offset);
797         maybe_unlock_mutex(root);
798         return ret;
799 }
800
801 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
802                                   struct btrfs_root *root, u64 bytenr,
803                                   u64 orig_parent, u64 parent,
804                                   u64 orig_root, u64 ref_root,
805                                   u64 orig_generation, u64 ref_generation,
806                                   u64 owner_objectid, u64 owner_offset)
807 {
808         struct btrfs_path *path;
809         int ret;
810         struct btrfs_key key;
811         struct extent_buffer *l;
812         struct btrfs_extent_item *item;
813         u32 refs;
814
815         path = btrfs_alloc_path();
816         if (!path)
817                 return -ENOMEM;
818
819         path->reada = 1;
820         key.objectid = bytenr;
821         key.type = BTRFS_EXTENT_ITEM_KEY;
822         key.offset = (u64)-1;
823
824         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
825                                 0, 1);
826         if (ret < 0)
827                 return ret;
828         BUG_ON(ret == 0 || path->slots[0] == 0);
829
830         path->slots[0]--;
831         l = path->nodes[0];
832
833         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
834         BUG_ON(key.objectid != bytenr);
835         BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
836
837         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
838         refs = btrfs_extent_refs(l, item);
839         btrfs_set_extent_refs(l, item, refs + 1);
840         btrfs_mark_buffer_dirty(path->nodes[0]);
841
842         btrfs_release_path(root->fs_info->extent_root, path);
843
844         path->reada = 1;
845         ret = insert_extent_backref(trans, root->fs_info->extent_root,
846                                     path, bytenr, parent,
847                                     ref_root, ref_generation,
848                                     owner_objectid, owner_offset);
849         BUG_ON(ret);
850         finish_current_insert(trans, root->fs_info->extent_root);
851         del_pending_extents(trans, root->fs_info->extent_root);
852
853         btrfs_free_path(path);
854         return 0;
855 }
856
857 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
858                          struct btrfs_root *root,
859                          u64 bytenr, u64 num_bytes, u64 parent,
860                          u64 ref_root, u64 ref_generation,
861                          u64 owner_objectid, u64 owner_offset)
862 {
863         int ret;
864         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
865             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
866                 return 0;
867         maybe_lock_mutex(root);
868         ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
869                                      0, ref_root, 0, ref_generation,
870                                      owner_objectid, owner_offset);
871         maybe_unlock_mutex(root);
872         return ret;
873 }
874
875 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
876                          struct btrfs_root *root)
877 {
878         finish_current_insert(trans, root->fs_info->extent_root);
879         del_pending_extents(trans, root->fs_info->extent_root);
880         return 0;
881 }
882
883 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
884                             struct btrfs_root *root, u64 bytenr,
885                             u64 num_bytes, u32 *refs)
886 {
887         struct btrfs_path *path;
888         int ret;
889         struct btrfs_key key;
890         struct extent_buffer *l;
891         struct btrfs_extent_item *item;
892
893         WARN_ON(num_bytes < root->sectorsize);
894         path = btrfs_alloc_path();
895         path->reada = 1;
896         key.objectid = bytenr;
897         key.offset = num_bytes;
898         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
899         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
900                                 0, 0);
901         if (ret < 0)
902                 goto out;
903         if (ret != 0) {
904                 btrfs_print_leaf(root, path->nodes[0]);
905                 printk("failed to find block number %Lu\n", bytenr);
906                 BUG();
907         }
908         l = path->nodes[0];
909         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
910         *refs = btrfs_extent_refs(l, item);
911 out:
912         btrfs_free_path(path);
913         return 0;
914 }
915
916 static int get_reference_status(struct btrfs_root *root, u64 bytenr,
917                                 u64 parent_gen, u64 ref_objectid,
918                                 u64 *min_generation, u32 *ref_count)
919 {
920         struct btrfs_root *extent_root = root->fs_info->extent_root;
921         struct btrfs_path *path;
922         struct extent_buffer *leaf;
923         struct btrfs_extent_ref *ref_item;
924         struct btrfs_key key;
925         struct btrfs_key found_key;
926         u64 root_objectid = root->root_key.objectid;
927         u64 ref_generation;
928         u32 nritems;
929         int ret;
930
931         key.objectid = bytenr;
932         key.offset = (u64)-1;
933         key.type = BTRFS_EXTENT_ITEM_KEY;
934
935         path = btrfs_alloc_path();
936         mutex_lock(&root->fs_info->alloc_mutex);
937         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
938         if (ret < 0)
939                 goto out;
940         BUG_ON(ret == 0);
941         if (ret < 0 || path->slots[0] == 0)
942                 goto out;
943
944         path->slots[0]--;
945         leaf = path->nodes[0];
946         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
947
948         if (found_key.objectid != bytenr ||
949             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
950                 ret = 1;
951                 goto out;
952         }
953
954         *ref_count = 0;
955         *min_generation = (u64)-1;
956
957         while (1) {
958                 leaf = path->nodes[0];
959                 nritems = btrfs_header_nritems(leaf);
960                 if (path->slots[0] >= nritems) {
961                         ret = btrfs_next_leaf(extent_root, path);
962                         if (ret < 0)
963                                 goto out;
964                         if (ret == 0)
965                                 continue;
966                         break;
967                 }
968                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
969                 if (found_key.objectid != bytenr)
970                         break;
971
972                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
973                         path->slots[0]++;
974                         continue;
975                 }
976
977                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
978                                           struct btrfs_extent_ref);
979                 ref_generation = btrfs_ref_generation(leaf, ref_item);
980                 /*
981                  * For (parent_gen > 0 && parent_gen > ref_generation):
982                  *
983                  * we reach here through the oldest root, therefore
984                  * all other reference from same snapshot should have
985                  * a larger generation.
986                  */
987                 if ((root_objectid != btrfs_ref_root(leaf, ref_item)) ||
988                     (parent_gen > 0 && parent_gen > ref_generation) ||
989                     (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
990                      ref_objectid != btrfs_ref_objectid(leaf, ref_item))) {
991                         *ref_count = 2;
992                         break;
993                 }
994
995                 *ref_count = 1;
996                 if (*min_generation > ref_generation)
997                         *min_generation = ref_generation;
998
999                 path->slots[0]++;
1000         }
1001         ret = 0;
1002 out:
1003         mutex_unlock(&root->fs_info->alloc_mutex);
1004         btrfs_free_path(path);
1005         return ret;
1006 }
1007
1008 int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
1009                            struct btrfs_root *root,
1010                            struct btrfs_key *key, u64 bytenr)
1011 {
1012         struct btrfs_root *old_root;
1013         struct btrfs_path *path = NULL;
1014         struct extent_buffer *eb;
1015         struct btrfs_file_extent_item *item;
1016         u64 ref_generation;
1017         u64 min_generation;
1018         u64 extent_start;
1019         u32 ref_count;
1020         int level;
1021         int ret;
1022
1023         BUG_ON(trans == NULL);
1024         BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
1025         ret = get_reference_status(root, bytenr, 0, key->objectid,
1026                                    &min_generation, &ref_count);
1027         if (ret)
1028                 return ret;
1029
1030         if (ref_count != 1)
1031                 return 1;
1032
1033         old_root = root->dirty_root->root;
1034         ref_generation = old_root->root_key.offset;
1035
1036         /* all references are created in running transaction */
1037         if (min_generation > ref_generation) {
1038                 ret = 0;
1039                 goto out;
1040         }
1041
1042         path = btrfs_alloc_path();
1043         if (!path) {
1044                 ret = -ENOMEM;
1045                 goto out;
1046         }
1047
1048         path->skip_locking = 1;
1049         /* if no item found, the extent is referenced by other snapshot */
1050         ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0);
1051         if (ret)
1052                 goto out;
1053
1054         eb = path->nodes[0];
1055         item = btrfs_item_ptr(eb, path->slots[0],
1056                               struct btrfs_file_extent_item);
1057         if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG ||
1058             btrfs_file_extent_disk_bytenr(eb, item) != bytenr) {
1059                 ret = 1;
1060                 goto out;
1061         }
1062
1063         for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) {
1064                 if (level >= 0) {
1065                         eb = path->nodes[level];
1066                         if (!eb)
1067                                 continue;
1068                         extent_start = eb->start;
1069                 } else
1070                         extent_start = bytenr;
1071
1072                 ret = get_reference_status(root, extent_start, ref_generation,
1073                                            0, &min_generation, &ref_count);
1074                 if (ret)
1075                         goto out;
1076
1077                 if (ref_count != 1) {
1078                         ret = 1;
1079                         goto out;
1080                 }
1081                 if (level >= 0)
1082                         ref_generation = btrfs_header_generation(eb);
1083         }
1084         ret = 0;
1085 out:
1086         if (path)
1087                 btrfs_free_path(path);
1088         return ret;
1089 }
1090
1091 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1092                     struct extent_buffer *buf, u32 nr_extents)
1093 {
1094         struct btrfs_key key;
1095         struct btrfs_file_extent_item *fi;
1096         u64 root_gen;
1097         u32 nritems;
1098         int i;
1099         int level;
1100         int ret = 0;
1101         int shared = 0;
1102
1103         if (!root->ref_cows)
1104                 return 0;
1105
1106         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1107                 shared = 0;
1108                 root_gen = root->root_key.offset;
1109         } else {
1110                 shared = 1;
1111                 root_gen = trans->transid - 1;
1112         }
1113
1114         level = btrfs_header_level(buf);
1115         nritems = btrfs_header_nritems(buf);
1116
1117         if (level == 0) {
1118                 struct btrfs_leaf_ref *ref;
1119                 struct btrfs_extent_info *info;
1120
1121                 ref = btrfs_alloc_leaf_ref(root, nr_extents);
1122                 if (!ref) {
1123                         ret = -ENOMEM;
1124                         goto out;
1125                 }
1126
1127                 ref->root_gen = root_gen;
1128                 ref->bytenr = buf->start;
1129                 ref->owner = btrfs_header_owner(buf);
1130                 ref->generation = btrfs_header_generation(buf);
1131                 ref->nritems = nr_extents;
1132                 info = ref->extents;
1133
1134                 for (i = 0; nr_extents > 0 && i < nritems; i++) {
1135                         u64 disk_bytenr;
1136                         btrfs_item_key_to_cpu(buf, &key, i);
1137                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1138                                 continue;
1139                         fi = btrfs_item_ptr(buf, i,
1140                                             struct btrfs_file_extent_item);
1141                         if (btrfs_file_extent_type(buf, fi) ==
1142                             BTRFS_FILE_EXTENT_INLINE)
1143                                 continue;
1144                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1145                         if (disk_bytenr == 0)
1146                                 continue;
1147
1148                         info->bytenr = disk_bytenr;
1149                         info->num_bytes =
1150                                 btrfs_file_extent_disk_num_bytes(buf, fi);
1151                         info->objectid = key.objectid;
1152                         info->offset = key.offset;
1153                         info++;
1154                 }
1155
1156                 ret = btrfs_add_leaf_ref(root, ref, shared);
1157                 WARN_ON(ret);
1158                 btrfs_free_leaf_ref(root, ref);
1159         }
1160 out:
1161         return ret;
1162 }
1163
1164 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1165                   struct extent_buffer *orig_buf, struct extent_buffer *buf,
1166                   u32 *nr_extents)
1167 {
1168         u64 bytenr;
1169         u64 ref_root;
1170         u64 orig_root;
1171         u64 ref_generation;
1172         u64 orig_generation;
1173         u32 nritems;
1174         u32 nr_file_extents = 0;
1175         struct btrfs_key key;
1176         struct btrfs_file_extent_item *fi;
1177         int i;
1178         int level;
1179         int ret = 0;
1180         int faili = 0;
1181         int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1182                             u64, u64, u64, u64, u64, u64, u64, u64, u64);
1183
1184         ref_root = btrfs_header_owner(buf);
1185         ref_generation = btrfs_header_generation(buf);
1186         orig_root = btrfs_header_owner(orig_buf);
1187         orig_generation = btrfs_header_generation(orig_buf);
1188
1189         nritems = btrfs_header_nritems(buf);
1190         level = btrfs_header_level(buf);
1191
1192         if (root->ref_cows) {
1193                 process_func = __btrfs_inc_extent_ref;
1194         } else {
1195                 if (level == 0 &&
1196                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1197                         goto out;
1198                 if (level != 0 &&
1199                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1200                         goto out;
1201                 process_func = __btrfs_update_extent_ref;
1202         }
1203
1204         for (i = 0; i < nritems; i++) {
1205                 cond_resched();
1206                 if (level == 0) {
1207                         btrfs_item_key_to_cpu(buf, &key, i);
1208                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1209                                 continue;
1210                         fi = btrfs_item_ptr(buf, i,
1211                                             struct btrfs_file_extent_item);
1212                         if (btrfs_file_extent_type(buf, fi) ==
1213                             BTRFS_FILE_EXTENT_INLINE)
1214                                 continue;
1215                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1216                         if (bytenr == 0)
1217                                 continue;
1218
1219                         nr_file_extents++;
1220
1221                         maybe_lock_mutex(root);
1222                         ret = process_func(trans, root, bytenr,
1223                                            orig_buf->start, buf->start,
1224                                            orig_root, ref_root,
1225                                            orig_generation, ref_generation,
1226                                            key.objectid, key.offset);
1227                         maybe_unlock_mutex(root);
1228
1229                         if (ret) {
1230                                 faili = i;
1231                                 WARN_ON(1);
1232                                 goto fail;
1233                         }
1234                 } else {
1235                         bytenr = btrfs_node_blockptr(buf, i);
1236                         maybe_lock_mutex(root);
1237                         ret = process_func(trans, root, bytenr,
1238                                            orig_buf->start, buf->start,
1239                                            orig_root, ref_root,
1240                                            orig_generation, ref_generation,
1241                                            level - 1, 0);
1242                         maybe_unlock_mutex(root);
1243                         if (ret) {
1244                                 faili = i;
1245                                 WARN_ON(1);
1246                                 goto fail;
1247                         }
1248                 }
1249         }
1250 out:
1251         if (nr_extents) {
1252                 if (level == 0)
1253                         *nr_extents = nr_file_extents;
1254                 else
1255                         *nr_extents = nritems;
1256         }
1257         return 0;
1258 fail:
1259         WARN_ON(1);
1260         return ret;
1261 }
1262
1263 int btrfs_update_ref(struct btrfs_trans_handle *trans,
1264                      struct btrfs_root *root, struct extent_buffer *orig_buf,
1265                      struct extent_buffer *buf, int start_slot, int nr)
1266
1267 {
1268         u64 bytenr;
1269         u64 ref_root;
1270         u64 orig_root;
1271         u64 ref_generation;
1272         u64 orig_generation;
1273         struct btrfs_key key;
1274         struct btrfs_file_extent_item *fi;
1275         int i;
1276         int ret;
1277         int slot;
1278         int level;
1279
1280         BUG_ON(start_slot < 0);
1281         BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1282
1283         ref_root = btrfs_header_owner(buf);
1284         ref_generation = btrfs_header_generation(buf);
1285         orig_root = btrfs_header_owner(orig_buf);
1286         orig_generation = btrfs_header_generation(orig_buf);
1287         level = btrfs_header_level(buf);
1288
1289         if (!root->ref_cows) {
1290                 if (level == 0 &&
1291                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1292                         return 0;
1293                 if (level != 0 &&
1294                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1295                         return 0;
1296         }
1297
1298         for (i = 0, slot = start_slot; i < nr; i++, slot++) {
1299                 cond_resched();
1300                 if (level == 0) {
1301                         btrfs_item_key_to_cpu(buf, &key, slot);
1302                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1303                                 continue;
1304                         fi = btrfs_item_ptr(buf, slot,
1305                                             struct btrfs_file_extent_item);
1306                         if (btrfs_file_extent_type(buf, fi) ==
1307                             BTRFS_FILE_EXTENT_INLINE)
1308                                 continue;
1309                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1310                         if (bytenr == 0)
1311                                 continue;
1312                         maybe_lock_mutex(root);
1313                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1314                                             orig_buf->start, buf->start,
1315                                             orig_root, ref_root,
1316                                             orig_generation, ref_generation,
1317                                             key.objectid, key.offset);
1318                         maybe_unlock_mutex(root);
1319                         if (ret)
1320                                 goto fail;
1321                 } else {
1322                         bytenr = btrfs_node_blockptr(buf, slot);
1323                         maybe_lock_mutex(root);
1324                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1325                                             orig_buf->start, buf->start,
1326                                             orig_root, ref_root,
1327                                             orig_generation, ref_generation,
1328                                             level - 1, 0);
1329                         maybe_unlock_mutex(root);
1330                         if (ret)
1331                                 goto fail;
1332                 }
1333         }
1334         return 0;
1335 fail:
1336         WARN_ON(1);
1337         return -1;
1338 }
1339
1340 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1341                                  struct btrfs_root *root,
1342                                  struct btrfs_path *path,
1343                                  struct btrfs_block_group_cache *cache)
1344 {
1345         int ret;
1346         int pending_ret;
1347         struct btrfs_root *extent_root = root->fs_info->extent_root;
1348         unsigned long bi;
1349         struct extent_buffer *leaf;
1350
1351         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1352         if (ret < 0)
1353                 goto fail;
1354         BUG_ON(ret);
1355
1356         leaf = path->nodes[0];
1357         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1358         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1359         btrfs_mark_buffer_dirty(leaf);
1360         btrfs_release_path(extent_root, path);
1361 fail:
1362         finish_current_insert(trans, extent_root);
1363         pending_ret = del_pending_extents(trans, extent_root);
1364         if (ret)
1365                 return ret;
1366         if (pending_ret)
1367                 return pending_ret;
1368         return 0;
1369
1370 }
1371
1372 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1373                                    struct btrfs_root *root)
1374 {
1375         struct btrfs_block_group_cache *cache, *entry;
1376         struct rb_node *n;
1377         int err = 0;
1378         int werr = 0;
1379         struct btrfs_path *path;
1380         u64 last = 0;
1381
1382         path = btrfs_alloc_path();
1383         if (!path)
1384                 return -ENOMEM;
1385
1386         mutex_lock(&root->fs_info->alloc_mutex);
1387         while(1) {
1388                 cache = NULL;
1389                 spin_lock(&root->fs_info->block_group_cache_lock);
1390                 for (n = rb_first(&root->fs_info->block_group_cache_tree);
1391                      n; n = rb_next(n)) {
1392                         entry = rb_entry(n, struct btrfs_block_group_cache,
1393                                          cache_node);
1394                         if (entry->dirty) {
1395                                 cache = entry;
1396                                 break;
1397                         }
1398                 }
1399                 spin_unlock(&root->fs_info->block_group_cache_lock);
1400
1401                 if (!cache)
1402                         break;
1403
1404                 cache->dirty = 0;
1405                 last += cache->key.offset;
1406
1407                 err = write_one_cache_group(trans, root,
1408                                             path, cache);
1409                 /*
1410                  * if we fail to write the cache group, we want
1411                  * to keep it marked dirty in hopes that a later
1412                  * write will work
1413                  */
1414                 if (err) {
1415                         werr = err;
1416                         continue;
1417                 }
1418         }
1419         btrfs_free_path(path);
1420         mutex_unlock(&root->fs_info->alloc_mutex);
1421         return werr;
1422 }
1423
1424 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1425                              u64 total_bytes, u64 bytes_used,
1426                              struct btrfs_space_info **space_info)
1427 {
1428         struct btrfs_space_info *found;
1429
1430         found = __find_space_info(info, flags);
1431         if (found) {
1432                 found->total_bytes += total_bytes;
1433                 found->bytes_used += bytes_used;
1434                 found->full = 0;
1435                 *space_info = found;
1436                 return 0;
1437         }
1438         found = kmalloc(sizeof(*found), GFP_NOFS);
1439         if (!found)
1440                 return -ENOMEM;
1441
1442         list_add(&found->list, &info->space_info);
1443         INIT_LIST_HEAD(&found->block_groups);
1444         spin_lock_init(&found->lock);
1445         found->flags = flags;
1446         found->total_bytes = total_bytes;
1447         found->bytes_used = bytes_used;
1448         found->bytes_pinned = 0;
1449         found->bytes_reserved = 0;
1450         found->full = 0;
1451         found->force_alloc = 0;
1452         *space_info = found;
1453         return 0;
1454 }
1455
1456 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1457 {
1458         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1459                                    BTRFS_BLOCK_GROUP_RAID1 |
1460                                    BTRFS_BLOCK_GROUP_RAID10 |
1461                                    BTRFS_BLOCK_GROUP_DUP);
1462         if (extra_flags) {
1463                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1464                         fs_info->avail_data_alloc_bits |= extra_flags;
1465                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1466                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1467                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1468                         fs_info->avail_system_alloc_bits |= extra_flags;
1469         }
1470 }
1471
1472 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1473 {
1474         u64 num_devices = root->fs_info->fs_devices->num_devices;
1475
1476         if (num_devices == 1)
1477                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1478         if (num_devices < 4)
1479                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1480
1481         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1482             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1483                       BTRFS_BLOCK_GROUP_RAID10))) {
1484                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1485         }
1486
1487         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1488             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1489                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1490         }
1491
1492         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1493             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1494              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1495              (flags & BTRFS_BLOCK_GROUP_DUP)))
1496                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1497         return flags;
1498 }
1499
1500 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1501                           struct btrfs_root *extent_root, u64 alloc_bytes,
1502                           u64 flags, int force)
1503 {
1504         struct btrfs_space_info *space_info;
1505         u64 thresh;
1506         u64 start;
1507         u64 num_bytes;
1508         int ret = 0;
1509
1510         flags = reduce_alloc_profile(extent_root, flags);
1511
1512         space_info = __find_space_info(extent_root->fs_info, flags);
1513         if (!space_info) {
1514                 ret = update_space_info(extent_root->fs_info, flags,
1515                                         0, 0, &space_info);
1516                 BUG_ON(ret);
1517         }
1518         BUG_ON(!space_info);
1519
1520         if (space_info->force_alloc) {
1521                 force = 1;
1522                 space_info->force_alloc = 0;
1523         }
1524         if (space_info->full)
1525                 goto out;
1526
1527         thresh = div_factor(space_info->total_bytes, 6);
1528         if (!force &&
1529            (space_info->bytes_used + space_info->bytes_pinned +
1530             space_info->bytes_reserved + alloc_bytes) < thresh)
1531                 goto out;
1532
1533         mutex_lock(&extent_root->fs_info->chunk_mutex);
1534         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1535         if (ret == -ENOSPC) {
1536 printk("space info full %Lu\n", flags);
1537                 space_info->full = 1;
1538                 goto out_unlock;
1539         }
1540         BUG_ON(ret);
1541
1542         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1543                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1544         BUG_ON(ret);
1545
1546 out_unlock:
1547         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1548 out:
1549         return ret;
1550 }
1551
1552 static int update_block_group(struct btrfs_trans_handle *trans,
1553                               struct btrfs_root *root,
1554                               u64 bytenr, u64 num_bytes, int alloc,
1555                               int mark_free)
1556 {
1557         struct btrfs_block_group_cache *cache;
1558         struct btrfs_fs_info *info = root->fs_info;
1559         u64 total = num_bytes;
1560         u64 old_val;
1561         u64 byte_in_group;
1562
1563         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1564         while(total) {
1565                 cache = btrfs_lookup_block_group(info, bytenr);
1566                 if (!cache) {
1567                         return -1;
1568                 }
1569                 byte_in_group = bytenr - cache->key.objectid;
1570                 WARN_ON(byte_in_group > cache->key.offset);
1571
1572                 spin_lock(&cache->lock);
1573                 cache->dirty = 1;
1574                 old_val = btrfs_block_group_used(&cache->item);
1575                 num_bytes = min(total, cache->key.offset - byte_in_group);
1576                 if (alloc) {
1577                         old_val += num_bytes;
1578                         cache->space_info->bytes_used += num_bytes;
1579                         btrfs_set_block_group_used(&cache->item, old_val);
1580                         spin_unlock(&cache->lock);
1581                 } else {
1582                         old_val -= num_bytes;
1583                         cache->space_info->bytes_used -= num_bytes;
1584                         btrfs_set_block_group_used(&cache->item, old_val);
1585                         spin_unlock(&cache->lock);
1586                         if (mark_free) {
1587                                 int ret;
1588                                 ret = btrfs_add_free_space(cache, bytenr,
1589                                                            num_bytes);
1590                                 if (ret)
1591                                         return -1;
1592                         }
1593                 }
1594                 total -= num_bytes;
1595                 bytenr += num_bytes;
1596         }
1597         return 0;
1598 }
1599
1600 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1601 {
1602         struct btrfs_block_group_cache *cache;
1603
1604         cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
1605         if (!cache)
1606                 return 0;
1607
1608         return cache->key.objectid;
1609 }
1610
1611 int btrfs_update_pinned_extents(struct btrfs_root *root,
1612                                 u64 bytenr, u64 num, int pin)
1613 {
1614         u64 len;
1615         struct btrfs_block_group_cache *cache;
1616         struct btrfs_fs_info *fs_info = root->fs_info;
1617
1618         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1619         if (pin) {
1620                 set_extent_dirty(&fs_info->pinned_extents,
1621                                 bytenr, bytenr + num - 1, GFP_NOFS);
1622         } else {
1623                 clear_extent_dirty(&fs_info->pinned_extents,
1624                                 bytenr, bytenr + num - 1, GFP_NOFS);
1625         }
1626         while (num > 0) {
1627                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1628                 BUG_ON(!cache);
1629                 len = min(num, cache->key.offset -
1630                           (bytenr - cache->key.objectid));
1631                 if (pin) {
1632                         spin_lock(&cache->lock);
1633                         cache->pinned += len;
1634                         cache->space_info->bytes_pinned += len;
1635                         spin_unlock(&cache->lock);
1636                         fs_info->total_pinned += len;
1637                 } else {
1638                         spin_lock(&cache->lock);
1639                         cache->pinned -= len;
1640                         cache->space_info->bytes_pinned -= len;
1641                         spin_unlock(&cache->lock);
1642                         fs_info->total_pinned -= len;
1643                 }
1644                 bytenr += len;
1645                 num -= len;
1646         }
1647         return 0;
1648 }
1649
1650 static int update_reserved_extents(struct btrfs_root *root,
1651                                    u64 bytenr, u64 num, int reserve)
1652 {
1653         u64 len;
1654         struct btrfs_block_group_cache *cache;
1655         struct btrfs_fs_info *fs_info = root->fs_info;
1656
1657         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1658         while (num > 0) {
1659                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1660                 BUG_ON(!cache);
1661                 len = min(num, cache->key.offset -
1662                           (bytenr - cache->key.objectid));
1663                 if (reserve) {
1664                         spin_lock(&cache->lock);
1665                         cache->reserved += len;
1666                         cache->space_info->bytes_reserved += len;
1667                         spin_unlock(&cache->lock);
1668                 } else {
1669                         spin_lock(&cache->lock);
1670                         cache->reserved -= len;
1671                         cache->space_info->bytes_reserved -= len;
1672                         spin_unlock(&cache->lock);
1673                 }
1674                 bytenr += len;
1675                 num -= len;
1676         }
1677         return 0;
1678 }
1679
1680 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1681 {
1682         u64 last = 0;
1683         u64 start;
1684         u64 end;
1685         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1686         int ret;
1687
1688         while(1) {
1689                 ret = find_first_extent_bit(pinned_extents, last,
1690                                             &start, &end, EXTENT_DIRTY);
1691                 if (ret)
1692                         break;
1693                 set_extent_dirty(copy, start, end, GFP_NOFS);
1694                 last = end + 1;
1695         }
1696         return 0;
1697 }
1698
1699 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1700                                struct btrfs_root *root,
1701                                struct extent_io_tree *unpin)
1702 {
1703         u64 start;
1704         u64 end;
1705         int ret;
1706         struct btrfs_block_group_cache *cache;
1707
1708         mutex_lock(&root->fs_info->alloc_mutex);
1709         while(1) {
1710                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1711                                             EXTENT_DIRTY);
1712                 if (ret)
1713                         break;
1714                 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
1715                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1716                 cache = btrfs_lookup_block_group(root->fs_info, start);
1717                 if (cache->cached)
1718                         btrfs_add_free_space(cache, start, end - start + 1);
1719                 if (need_resched()) {
1720                         mutex_unlock(&root->fs_info->alloc_mutex);
1721                         cond_resched();
1722                         mutex_lock(&root->fs_info->alloc_mutex);
1723                 }
1724         }
1725         mutex_unlock(&root->fs_info->alloc_mutex);
1726         return 0;
1727 }
1728
1729 static int finish_current_insert(struct btrfs_trans_handle *trans,
1730                                  struct btrfs_root *extent_root)
1731 {
1732         u64 start;
1733         u64 end;
1734         u64 priv;
1735         struct btrfs_fs_info *info = extent_root->fs_info;
1736         struct btrfs_path *path;
1737         struct btrfs_extent_ref *ref;
1738         struct pending_extent_op *extent_op;
1739         struct btrfs_key key;
1740         struct btrfs_extent_item extent_item;
1741         int ret;
1742         int err = 0;
1743
1744         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1745         btrfs_set_stack_extent_refs(&extent_item, 1);
1746         path = btrfs_alloc_path();
1747
1748         while(1) {
1749                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1750                                             &end, EXTENT_LOCKED);
1751                 if (ret)
1752                         break;
1753
1754                 ret = get_state_private(&info->extent_ins, start, &priv);
1755                 BUG_ON(ret);
1756                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
1757
1758                 if (extent_op->type == PENDING_EXTENT_INSERT) {
1759                         key.objectid = start;
1760                         key.offset = end + 1 - start;
1761                         key.type = BTRFS_EXTENT_ITEM_KEY;
1762                         err = btrfs_insert_item(trans, extent_root, &key,
1763                                         &extent_item, sizeof(extent_item));
1764                         BUG_ON(err);
1765
1766                         clear_extent_bits(&info->extent_ins, start, end,
1767                                           EXTENT_LOCKED, GFP_NOFS);
1768
1769                         err = insert_extent_backref(trans, extent_root, path,
1770                                                 start, extent_op->parent,
1771                                                 extent_root->root_key.objectid,
1772                                                 extent_op->generation,
1773                                                 extent_op->level, 0);
1774                         BUG_ON(err);
1775                 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
1776                         err = lookup_extent_backref(trans, extent_root, path,
1777                                                 start, extent_op->orig_parent,
1778                                                 extent_root->root_key.objectid,
1779                                                 extent_op->orig_generation, 0);
1780                         BUG_ON(err);
1781
1782                         clear_extent_bits(&info->extent_ins, start, end,
1783                                           EXTENT_LOCKED, GFP_NOFS);
1784
1785                         key.objectid = start;
1786                         key.offset = extent_op->parent;
1787                         key.type = BTRFS_EXTENT_REF_KEY;
1788                         err = btrfs_set_item_key_safe(trans, extent_root, path,
1789                                                       &key);
1790                         BUG_ON(err);
1791                         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1792                                              struct btrfs_extent_ref);
1793                         btrfs_set_ref_generation(path->nodes[0], ref,
1794                                                  extent_op->generation);
1795                         btrfs_mark_buffer_dirty(path->nodes[0]);
1796                         btrfs_release_path(extent_root, path);
1797                 } else {
1798                         BUG_ON(1);
1799                 }
1800                 kfree(extent_op);
1801
1802                 if (need_resched()) {
1803                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
1804                         cond_resched();
1805                         mutex_lock(&extent_root->fs_info->alloc_mutex);
1806                 }
1807         }
1808         btrfs_free_path(path);
1809         return 0;
1810 }
1811
1812 static int pin_down_bytes(struct btrfs_trans_handle *trans,
1813                           struct btrfs_root *root,
1814                           u64 bytenr, u64 num_bytes, int is_data)
1815 {
1816         int err = 0;
1817         struct extent_buffer *buf;
1818
1819         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1820         if (is_data)
1821                 goto pinit;
1822
1823         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1824         if (!buf)
1825                 goto pinit;
1826
1827         /* we can reuse a block if it hasn't been written
1828          * and it is from this transaction.  We can't
1829          * reuse anything from the tree log root because
1830          * it has tiny sub-transactions.
1831          */
1832         if (btrfs_buffer_uptodate(buf, 0) &&
1833             btrfs_try_tree_lock(buf)) {
1834                 u64 header_owner = btrfs_header_owner(buf);
1835                 u64 header_transid = btrfs_header_generation(buf);
1836                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
1837                     header_transid == trans->transid &&
1838                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
1839                         clean_tree_block(NULL, root, buf);
1840                         btrfs_tree_unlock(buf);
1841                         free_extent_buffer(buf);
1842                         return 1;
1843                 }
1844                 btrfs_tree_unlock(buf);
1845         }
1846         free_extent_buffer(buf);
1847 pinit:
1848         btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
1849
1850         BUG_ON(err < 0);
1851         return 0;
1852 }
1853
1854 /*
1855  * remove an extent from the root, returns 0 on success
1856  */
1857 static int __free_extent(struct btrfs_trans_handle *trans,
1858                          struct btrfs_root *root,
1859                          u64 bytenr, u64 num_bytes, u64 parent,
1860                          u64 root_objectid, u64 ref_generation,
1861                          u64 owner_objectid, u64 owner_offset,
1862                          int pin, int mark_free)
1863 {
1864         struct btrfs_path *path;
1865         struct btrfs_key key;
1866         struct btrfs_fs_info *info = root->fs_info;
1867         struct btrfs_root *extent_root = info->extent_root;
1868         struct extent_buffer *leaf;
1869         int ret;
1870         int extent_slot = 0;
1871         int found_extent = 0;
1872         int num_to_del = 1;
1873         struct btrfs_extent_item *ei;
1874         u32 refs;
1875
1876         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1877         key.objectid = bytenr;
1878         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1879         key.offset = num_bytes;
1880         path = btrfs_alloc_path();
1881         if (!path)
1882                 return -ENOMEM;
1883
1884         path->reada = 1;
1885         ret = lookup_extent_backref(trans, extent_root, path, bytenr, parent,
1886                                     root_objectid, ref_generation, 1);
1887         if (ret == 0) {
1888                 struct btrfs_key found_key;
1889                 extent_slot = path->slots[0];
1890                 while(extent_slot > 0) {
1891                         extent_slot--;
1892                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1893                                               extent_slot);
1894                         if (found_key.objectid != bytenr)
1895                                 break;
1896                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1897                             found_key.offset == num_bytes) {
1898                                 found_extent = 1;
1899                                 break;
1900                         }
1901                         if (path->slots[0] - extent_slot > 5)
1902                                 break;
1903                 }
1904                 if (!found_extent) {
1905                         ret = remove_extent_backref(trans, extent_root, path);
1906                         BUG_ON(ret);
1907                         btrfs_release_path(extent_root, path);
1908                         ret = btrfs_search_slot(trans, extent_root,
1909                                                 &key, path, -1, 1);
1910                         BUG_ON(ret);
1911                         extent_slot = path->slots[0];
1912                 }
1913         } else {
1914                 btrfs_print_leaf(extent_root, path->nodes[0]);
1915                 WARN_ON(1);
1916                 printk("Unable to find ref byte nr %Lu root %Lu "
1917                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1918                        root_objectid, ref_generation, owner_objectid,
1919                        owner_offset);
1920         }
1921
1922         leaf = path->nodes[0];
1923         ei = btrfs_item_ptr(leaf, extent_slot,
1924                             struct btrfs_extent_item);
1925         refs = btrfs_extent_refs(leaf, ei);
1926         BUG_ON(refs == 0);
1927         refs -= 1;
1928         btrfs_set_extent_refs(leaf, ei, refs);
1929
1930         btrfs_mark_buffer_dirty(leaf);
1931
1932         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1933                 struct btrfs_extent_ref *ref;
1934                 ref = btrfs_item_ptr(leaf, path->slots[0],
1935                                      struct btrfs_extent_ref);
1936                 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
1937                 /* if the back ref and the extent are next to each other
1938                  * they get deleted below in one shot
1939                  */
1940                 path->slots[0] = extent_slot;
1941                 num_to_del = 2;
1942         } else if (found_extent) {
1943                 /* otherwise delete the extent back ref */
1944                 ret = remove_extent_backref(trans, extent_root, path);
1945                 BUG_ON(ret);
1946                 /* if refs are 0, we need to setup the path for deletion */
1947                 if (refs == 0) {
1948                         btrfs_release_path(extent_root, path);
1949                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1950                                                 -1, 1);
1951                         BUG_ON(ret);
1952                 }
1953         }
1954
1955         if (refs == 0) {
1956                 u64 super_used;
1957                 u64 root_used;
1958 #ifdef BIO_RW_DISCARD
1959                 u64 map_length = num_bytes;
1960                 struct btrfs_multi_bio *multi = NULL;
1961 #endif
1962
1963                 if (pin) {
1964                         ret = pin_down_bytes(trans, root, bytenr, num_bytes,
1965                                 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
1966                         if (ret > 0)
1967                                 mark_free = 1;
1968                         BUG_ON(ret < 0);
1969                 }
1970
1971                 /* block accounting for super block */
1972                 spin_lock_irq(&info->delalloc_lock);
1973                 super_used = btrfs_super_bytes_used(&info->super_copy);
1974                 btrfs_set_super_bytes_used(&info->super_copy,
1975                                            super_used - num_bytes);
1976                 spin_unlock_irq(&info->delalloc_lock);
1977
1978                 /* block accounting for root item */
1979                 root_used = btrfs_root_used(&root->root_item);
1980                 btrfs_set_root_used(&root->root_item,
1981                                            root_used - num_bytes);
1982                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1983                                       num_to_del);
1984                 BUG_ON(ret);
1985                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1986                                          mark_free);
1987                 BUG_ON(ret);
1988
1989 #ifdef BIO_RW_DISCARD
1990                 /* Tell the block device(s) that the sectors can be discarded */
1991                 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
1992                                       bytenr, &map_length, &multi, 0);
1993                 if (!ret) {
1994                         struct btrfs_bio_stripe *stripe = multi->stripes;
1995                         int i;
1996
1997                         if (map_length > num_bytes)
1998                                 map_length = num_bytes;
1999
2000                         for (i = 0; i < multi->num_stripes; i++, stripe++) {
2001                                 blkdev_issue_discard(stripe->dev->bdev,
2002                                                      stripe->physical >> 9,
2003                                                      map_length >> 9);
2004                         }
2005                         kfree(multi);
2006                 }
2007 #endif
2008         }
2009         btrfs_free_path(path);
2010         finish_current_insert(trans, extent_root);
2011         return ret;
2012 }
2013
2014 /*
2015  * find all the blocks marked as pending in the radix tree and remove
2016  * them from the extent map
2017  */
2018 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
2019                                btrfs_root *extent_root)
2020 {
2021         int ret;
2022         int err = 0;
2023         int mark_free = 0;
2024         u64 start;
2025         u64 end;
2026         u64 priv;
2027         struct extent_io_tree *pending_del;
2028         struct extent_io_tree *extent_ins;
2029         struct pending_extent_op *extent_op;
2030
2031         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
2032         extent_ins = &extent_root->fs_info->extent_ins;
2033         pending_del = &extent_root->fs_info->pending_del;
2034
2035         while(1) {
2036                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
2037                                             EXTENT_LOCKED);
2038                 if (ret)
2039                         break;
2040
2041                 ret = get_state_private(pending_del, start, &priv);
2042                 BUG_ON(ret);
2043                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2044
2045                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
2046                                   GFP_NOFS);
2047
2048                 ret = pin_down_bytes(trans, extent_root, start,
2049                                      end + 1 - start, 0);
2050                 mark_free = ret > 0;
2051                 if (!test_range_bit(extent_ins, start, end,
2052                                     EXTENT_LOCKED, 0)) {
2053 free_extent:
2054                         ret = __free_extent(trans, extent_root,
2055                                             start, end + 1 - start,
2056                                             extent_op->orig_parent,
2057                                             extent_root->root_key.objectid,
2058                                             extent_op->orig_generation,
2059                                             extent_op->level, 0, 0, mark_free);
2060                         kfree(extent_op);
2061                 } else {
2062                         kfree(extent_op);
2063                         ret = get_state_private(extent_ins, start, &priv);
2064                         BUG_ON(ret);
2065                         extent_op = (struct pending_extent_op *)
2066                                                         (unsigned long)priv;
2067
2068                         clear_extent_bits(extent_ins, start, end,
2069                                           EXTENT_LOCKED, GFP_NOFS);
2070
2071                         if (extent_op->type == PENDING_BACKREF_UPDATE)
2072                                 goto free_extent;
2073
2074                         ret = update_block_group(trans, extent_root, start,
2075                                                 end + 1 - start, 0, mark_free);
2076                         BUG_ON(ret);
2077                         kfree(extent_op);
2078                 }
2079                 if (ret)
2080                         err = ret;
2081
2082                 if (need_resched()) {
2083                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2084                         cond_resched();
2085                         mutex_lock(&extent_root->fs_info->alloc_mutex);
2086                 }
2087         }
2088         return err;
2089 }
2090
2091 /*
2092  * remove an extent from the root, returns 0 on success
2093  */
2094 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2095                                struct btrfs_root *root,
2096                                u64 bytenr, u64 num_bytes, u64 parent,
2097                                u64 root_objectid, u64 ref_generation,
2098                                u64 owner_objectid, u64 owner_offset, int pin)
2099 {
2100         struct btrfs_root *extent_root = root->fs_info->extent_root;
2101         int pending_ret;
2102         int ret;
2103
2104         WARN_ON(num_bytes < root->sectorsize);
2105         if (root == extent_root) {
2106                 struct pending_extent_op *extent_op;
2107
2108                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2109                 BUG_ON(!extent_op);
2110
2111                 extent_op->type = PENDING_EXTENT_DELETE;
2112                 extent_op->bytenr = bytenr;
2113                 extent_op->num_bytes = num_bytes;
2114                 extent_op->parent = parent;
2115                 extent_op->orig_parent = parent;
2116                 extent_op->generation = ref_generation;
2117                 extent_op->orig_generation = ref_generation;
2118                 extent_op->level = (int)owner_objectid;
2119
2120                 set_extent_bits(&root->fs_info->pending_del,
2121                                 bytenr, bytenr + num_bytes - 1,
2122                                 EXTENT_LOCKED, GFP_NOFS);
2123                 set_state_private(&root->fs_info->pending_del,
2124                                   bytenr, (unsigned long)extent_op);
2125                 return 0;
2126         }
2127         /* if metadata always pin */
2128         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2129                 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2130                         struct btrfs_block_group_cache *cache;
2131
2132                         /* btrfs_free_reserved_extent */
2133                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
2134                         BUG_ON(!cache);
2135                         btrfs_add_free_space(cache, bytenr, num_bytes);
2136                         update_reserved_extents(root, bytenr, num_bytes, 0);
2137                         return 0;
2138                 }
2139                 pin = 1;
2140         }
2141
2142         /* if data pin when any transaction has committed this */
2143         if (ref_generation != trans->transid)
2144                 pin = 1;
2145
2146         ret = __free_extent(trans, root, bytenr, num_bytes, parent,
2147                             root_objectid, ref_generation, owner_objectid,
2148                             owner_offset, pin, pin == 0);
2149
2150         finish_current_insert(trans, root->fs_info->extent_root);
2151         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
2152         return ret ? ret : pending_ret;
2153 }
2154
2155 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2156                       struct btrfs_root *root,
2157                       u64 bytenr, u64 num_bytes, u64 parent,
2158                       u64 root_objectid, u64 ref_generation,
2159                       u64 owner_objectid, u64 owner_offset, int pin)
2160 {
2161         int ret;
2162
2163         maybe_lock_mutex(root);
2164         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent,
2165                                   root_objectid, ref_generation,
2166                                   owner_objectid, owner_offset, pin);
2167         maybe_unlock_mutex(root);
2168         return ret;
2169 }
2170
2171 static u64 stripe_align(struct btrfs_root *root, u64 val)
2172 {
2173         u64 mask = ((u64)root->stripesize - 1);
2174         u64 ret = (val + mask) & ~mask;
2175         return ret;
2176 }
2177
2178 /*
2179  * walks the btree of allocated extents and find a hole of a given size.
2180  * The key ins is changed to record the hole:
2181  * ins->objectid == block start
2182  * ins->flags = BTRFS_EXTENT_ITEM_KEY
2183  * ins->offset == number of blocks
2184  * Any available blocks before search_start are skipped.
2185  */
2186 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2187                                      struct btrfs_root *orig_root,
2188                                      u64 num_bytes, u64 empty_size,
2189                                      u64 search_start, u64 search_end,
2190                                      u64 hint_byte, struct btrfs_key *ins,
2191                                      u64 exclude_start, u64 exclude_nr,
2192                                      int data)
2193 {
2194         int ret;
2195         u64 orig_search_start;
2196         struct btrfs_root * root = orig_root->fs_info->extent_root;
2197         struct btrfs_fs_info *info = root->fs_info;
2198         u64 total_needed = num_bytes;
2199         u64 *last_ptr = NULL;
2200         struct btrfs_block_group_cache *block_group;
2201         int chunk_alloc_done = 0;
2202         int empty_cluster = 2 * 1024 * 1024;
2203         int allowed_chunk_alloc = 0;
2204
2205         WARN_ON(num_bytes < root->sectorsize);
2206         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
2207
2208         if (orig_root->ref_cows || empty_size)
2209                 allowed_chunk_alloc = 1;
2210
2211         if (data & BTRFS_BLOCK_GROUP_METADATA) {
2212                 last_ptr = &root->fs_info->last_alloc;
2213                 empty_cluster = 256 * 1024;
2214         }
2215
2216         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
2217                 last_ptr = &root->fs_info->last_data_alloc;
2218
2219         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2220                 last_ptr = &root->fs_info->last_log_alloc;
2221                 if (!last_ptr == 0 && root->fs_info->last_alloc) {
2222                         *last_ptr = root->fs_info->last_alloc + empty_cluster;
2223                 }
2224         }
2225
2226         if (last_ptr) {
2227                 if (*last_ptr)
2228                         hint_byte = *last_ptr;
2229                 else
2230                         empty_size += empty_cluster;
2231         }
2232
2233         search_start = max(search_start, first_logical_byte(root, 0));
2234         orig_search_start = search_start;
2235
2236         search_start = max(search_start, hint_byte);
2237         total_needed += empty_size;
2238
2239 new_group:
2240         block_group = btrfs_lookup_first_block_group(info, search_start);
2241
2242         /*
2243          * Ok this looks a little tricky, buts its really simple.  First if we
2244          * didn't find a block group obviously we want to start over.
2245          * Secondly, if the block group we found does not match the type we
2246          * need, and we have a last_ptr and its not 0, chances are the last
2247          * allocation we made was at the end of the block group, so lets go
2248          * ahead and skip the looking through the rest of the block groups and
2249          * start at the beginning.  This helps with metadata allocations,
2250          * since you are likely to have a bunch of data block groups to search
2251          * through first before you realize that you need to start over, so go
2252          * ahead and start over and save the time.
2253          */
2254         if (!block_group || (!block_group_bits(block_group, data) &&
2255                              last_ptr && *last_ptr)) {
2256                 if (search_start != orig_search_start) {
2257                         if (last_ptr && *last_ptr)
2258                                 *last_ptr = 0;
2259                         search_start = orig_search_start;
2260                         goto new_group;
2261                 } else if (!chunk_alloc_done && allowed_chunk_alloc) {
2262                         ret = do_chunk_alloc(trans, root,
2263                                              num_bytes + 2 * 1024 * 1024,
2264                                              data, 1);
2265                         if (ret < 0)
2266                                 goto error;
2267                         BUG_ON(ret);
2268                         chunk_alloc_done = 1;
2269                         search_start = orig_search_start;
2270                         goto new_group;
2271                 } else {
2272                         ret = -ENOSPC;
2273                         goto error;
2274                 }
2275         }
2276
2277         /*
2278          * this is going to seach through all of the existing block groups it
2279          * can find, so if we don't find something we need to see if we can
2280          * allocate what we need.
2281          */
2282         ret = find_free_space(root, &block_group, &search_start,
2283                               total_needed, data);
2284         if (ret == -ENOSPC) {
2285                 /*
2286                  * instead of allocating, start at the original search start
2287                  * and see if there is something to be found, if not then we
2288                  * allocate
2289                  */
2290                 if (search_start != orig_search_start) {
2291                         if (last_ptr && *last_ptr) {
2292                                 *last_ptr = 0;
2293                                 total_needed += empty_cluster;
2294                         }
2295                         search_start = orig_search_start;
2296                         goto new_group;
2297                 }
2298
2299                 /*
2300                  * we've already allocated, we're pretty screwed
2301                  */
2302                 if (chunk_alloc_done) {
2303                         goto error;
2304                 } else if (!allowed_chunk_alloc && block_group &&
2305                            block_group_bits(block_group, data)) {
2306                         block_group->space_info->force_alloc = 1;
2307                         goto error;
2308                 } else if (!allowed_chunk_alloc) {
2309                         goto error;
2310                 }
2311
2312                 ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024,
2313                                      data, 1);
2314                 if (ret < 0)
2315                         goto error;
2316
2317                 BUG_ON(ret);
2318                 chunk_alloc_done = 1;
2319                 if (block_group)
2320                         search_start = block_group->key.objectid +
2321                                 block_group->key.offset;
2322                 else
2323                         search_start = orig_search_start;
2324                 goto new_group;
2325         }
2326
2327         if (ret)
2328                 goto error;
2329
2330         search_start = stripe_align(root, search_start);
2331         ins->objectid = search_start;
2332         ins->offset = num_bytes;
2333
2334         if (ins->objectid + num_bytes >= search_end) {
2335                 search_start = orig_search_start;
2336                 if (chunk_alloc_done) {
2337                         ret = -ENOSPC;
2338                         goto error;
2339                 }
2340                 goto new_group;
2341         }
2342
2343         if (ins->objectid + num_bytes >
2344             block_group->key.objectid + block_group->key.offset) {
2345                 if (search_start == orig_search_start && chunk_alloc_done) {
2346                         ret = -ENOSPC;
2347                         goto error;
2348                 }
2349                 search_start = block_group->key.objectid +
2350                         block_group->key.offset;
2351                 goto new_group;
2352         }
2353
2354         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
2355             ins->objectid < exclude_start + exclude_nr)) {
2356                 search_start = exclude_start + exclude_nr;
2357                 goto new_group;
2358         }
2359
2360         if (!(data & BTRFS_BLOCK_GROUP_DATA))
2361                 trans->block_group = block_group;
2362
2363         ins->offset = num_bytes;
2364         if (last_ptr) {
2365                 *last_ptr = ins->objectid + ins->offset;
2366                 if (*last_ptr ==
2367                     btrfs_super_total_bytes(&root->fs_info->super_copy))
2368                         *last_ptr = 0;
2369         }
2370
2371         ret = 0;
2372 error:
2373         return ret;
2374 }
2375
2376 static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
2377 {
2378         struct btrfs_block_group_cache *cache;
2379         struct list_head *l;
2380
2381         printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
2382                info->total_bytes - info->bytes_used - info->bytes_pinned -
2383                info->bytes_reserved, (info->full) ? "" : "not ");
2384
2385         spin_lock(&info->lock);
2386         list_for_each(l, &info->block_groups) {
2387                 cache = list_entry(l, struct btrfs_block_group_cache, list);
2388                 spin_lock(&cache->lock);
2389                 printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
2390                        "%Lu pinned %Lu reserved\n",
2391                        cache->key.objectid, cache->key.offset,
2392                        btrfs_block_group_used(&cache->item),
2393                        cache->pinned, cache->reserved);
2394                 btrfs_dump_free_space(cache, bytes);
2395                 spin_unlock(&cache->lock);
2396         }
2397         spin_unlock(&info->lock);
2398 }
2399
2400 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2401                                   struct btrfs_root *root,
2402                                   u64 num_bytes, u64 min_alloc_size,
2403                                   u64 empty_size, u64 hint_byte,
2404                                   u64 search_end, struct btrfs_key *ins,
2405                                   u64 data)
2406 {
2407         int ret;
2408         u64 search_start = 0;
2409         u64 alloc_profile;
2410         struct btrfs_fs_info *info = root->fs_info;
2411         struct btrfs_block_group_cache *cache;
2412
2413         if (data) {
2414                 alloc_profile = info->avail_data_alloc_bits &
2415                                 info->data_alloc_profile;
2416                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2417         } else if (root == root->fs_info->chunk_root) {
2418                 alloc_profile = info->avail_system_alloc_bits &
2419                                 info->system_alloc_profile;
2420                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2421         } else {
2422                 alloc_profile = info->avail_metadata_alloc_bits &
2423                                 info->metadata_alloc_profile;
2424                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2425         }
2426 again:
2427         data = reduce_alloc_profile(root, data);
2428         /*
2429          * the only place that sets empty_size is btrfs_realloc_node, which
2430          * is not called recursively on allocations
2431          */
2432         if (empty_size || root->ref_cows) {
2433                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
2434                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2435                                      2 * 1024 * 1024,
2436                                      BTRFS_BLOCK_GROUP_METADATA |
2437                                      (info->metadata_alloc_profile &
2438                                       info->avail_metadata_alloc_bits), 0);
2439                 }
2440                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2441                                      num_bytes + 2 * 1024 * 1024, data, 0);
2442         }
2443
2444         WARN_ON(num_bytes < root->sectorsize);
2445         ret = find_free_extent(trans, root, num_bytes, empty_size,
2446                                search_start, search_end, hint_byte, ins,
2447                                trans->alloc_exclude_start,
2448                                trans->alloc_exclude_nr, data);
2449
2450         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
2451                 num_bytes = num_bytes >> 1;
2452                 num_bytes = num_bytes & ~(root->sectorsize - 1);
2453                 num_bytes = max(num_bytes, min_alloc_size);
2454                 do_chunk_alloc(trans, root->fs_info->extent_root,
2455                                num_bytes, data, 1);
2456                 goto again;
2457         }
2458         if (ret) {
2459                 struct btrfs_space_info *sinfo;
2460
2461                 sinfo = __find_space_info(root->fs_info, data);
2462                 printk("allocation failed flags %Lu, wanted %Lu\n",
2463                        data, num_bytes);
2464                 dump_space_info(sinfo, num_bytes);
2465                 BUG();
2466         }
2467         cache = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2468         if (!cache) {
2469                 printk(KERN_ERR "Unable to find block group for %Lu\n", ins->objectid);
2470                 return -ENOSPC;
2471         }
2472
2473         ret = btrfs_remove_free_space(cache, ins->objectid, ins->offset);
2474
2475         return ret;
2476 }
2477
2478 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
2479 {
2480         struct btrfs_block_group_cache *cache;
2481
2482         maybe_lock_mutex(root);
2483         cache = btrfs_lookup_block_group(root->fs_info, start);
2484         if (!cache) {
2485                 printk(KERN_ERR "Unable to find block group for %Lu\n", start);
2486                 maybe_unlock_mutex(root);
2487                 return -ENOSPC;
2488         }
2489         btrfs_add_free_space(cache, start, len);
2490         maybe_unlock_mutex(root);
2491         return 0;
2492 }
2493
2494 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2495                                   struct btrfs_root *root,
2496                                   u64 num_bytes, u64 min_alloc_size,
2497                                   u64 empty_size, u64 hint_byte,
2498                                   u64 search_end, struct btrfs_key *ins,
2499                                   u64 data)
2500 {
2501         int ret;
2502         maybe_lock_mutex(root);
2503         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
2504                                      empty_size, hint_byte, search_end, ins,
2505                                      data);
2506         update_reserved_extents(root, ins->objectid, ins->offset, 1);
2507         maybe_unlock_mutex(root);
2508         return ret;
2509 }
2510
2511 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2512                                          struct btrfs_root *root, u64 parent,
2513                                          u64 root_objectid, u64 ref_generation,
2514                                          u64 owner, u64 owner_offset,
2515                                          struct btrfs_key *ins)
2516 {
2517         int ret;
2518         int pending_ret;
2519         u64 super_used;
2520         u64 root_used;
2521         u64 num_bytes = ins->offset;
2522         u32 sizes[2];
2523         struct btrfs_fs_info *info = root->fs_info;
2524         struct btrfs_root *extent_root = info->extent_root;
2525         struct btrfs_extent_item *extent_item;
2526         struct btrfs_extent_ref *ref;
2527         struct btrfs_path *path;
2528         struct btrfs_key keys[2];
2529
2530         if (parent == 0)
2531                 parent = ins->objectid;
2532
2533         /* block accounting for super block */
2534         spin_lock_irq(&info->delalloc_lock);
2535         super_used = btrfs_super_bytes_used(&info->super_copy);
2536         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2537         spin_unlock_irq(&info->delalloc_lock);
2538
2539         /* block accounting for root item */
2540         root_used = btrfs_root_used(&root->root_item);
2541         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
2542
2543         if (root == extent_root) {
2544                 struct pending_extent_op *extent_op;
2545
2546                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2547                 BUG_ON(!extent_op);
2548
2549                 extent_op->type = PENDING_EXTENT_INSERT;
2550                 extent_op->bytenr = ins->objectid;
2551                 extent_op->num_bytes = ins->offset;
2552                 extent_op->parent = parent;
2553                 extent_op->orig_parent = 0;
2554                 extent_op->generation = ref_generation;
2555                 extent_op->orig_generation = 0;
2556                 extent_op->level = (int)owner;
2557
2558                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2559                                 ins->objectid + ins->offset - 1,
2560                                 EXTENT_LOCKED, GFP_NOFS);
2561                 set_state_private(&root->fs_info->extent_ins,
2562                                   ins->objectid, (unsigned long)extent_op);
2563                 goto update_block;
2564         }
2565
2566         memcpy(&keys[0], ins, sizeof(*ins));
2567         keys[1].objectid = ins->objectid;
2568         keys[1].type = BTRFS_EXTENT_REF_KEY;
2569         keys[1].offset = parent;
2570         sizes[0] = sizeof(*extent_item);
2571         sizes[1] = sizeof(*ref);
2572
2573         path = btrfs_alloc_path();
2574         BUG_ON(!path);
2575
2576         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2577                                        sizes, 2);
2578         BUG_ON(ret);
2579
2580         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581                                      struct btrfs_extent_item);
2582         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2583         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2584                              struct btrfs_extent_ref);
2585
2586         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2587         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2588         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2589         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
2590         btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
2591
2592         btrfs_mark_buffer_dirty(path->nodes[0]);
2593
2594         trans->alloc_exclude_start = 0;
2595         trans->alloc_exclude_nr = 0;
2596         btrfs_free_path(path);
2597         finish_current_insert(trans, extent_root);
2598         pending_ret = del_pending_extents(trans, extent_root);
2599
2600         if (ret)
2601                 goto out;
2602         if (pending_ret) {
2603                 ret = pending_ret;
2604                 goto out;
2605         }
2606
2607 update_block:
2608         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
2609         if (ret) {
2610                 printk("update block group failed for %Lu %Lu\n",
2611                        ins->objectid, ins->offset);
2612                 BUG();
2613         }
2614 out:
2615         return ret;
2616 }
2617
2618 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2619                                 struct btrfs_root *root, u64 parent,
2620                                 u64 root_objectid, u64 ref_generation,
2621                                 u64 owner, u64 owner_offset,
2622                                 struct btrfs_key *ins)
2623 {
2624         int ret;
2625
2626         if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
2627                 return 0;
2628         maybe_lock_mutex(root);
2629         ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2630                                             root_objectid, ref_generation,
2631                                             owner, owner_offset, ins);
2632         update_reserved_extents(root, ins->objectid, ins->offset, 0);
2633         maybe_unlock_mutex(root);
2634         return ret;
2635 }
2636
2637 /*
2638  * this is used by the tree logging recovery code.  It records that
2639  * an extent has been allocated and makes sure to clear the free
2640  * space cache bits as well
2641  */
2642 int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
2643                                 struct btrfs_root *root, u64 parent,
2644                                 u64 root_objectid, u64 ref_generation,
2645                                 u64 owner, u64 owner_offset,
2646                                 struct btrfs_key *ins)
2647 {
2648         int ret;
2649         struct btrfs_block_group_cache *block_group;
2650
2651         maybe_lock_mutex(root);
2652         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2653         cache_block_group(root, block_group);
2654
2655         ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset);
2656         BUG_ON(ret);
2657         ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2658                                             root_objectid, ref_generation,
2659                                             owner, owner_offset, ins);
2660         maybe_unlock_mutex(root);
2661         return ret;
2662 }
2663
2664 /*
2665  * finds a free extent and does all the dirty work required for allocation
2666  * returns the key for the extent through ins, and a tree buffer for
2667  * the first block of the extent through buf.
2668  *
2669  * returns 0 if everything worked, non-zero otherwise.
2670  */
2671 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2672                        struct btrfs_root *root,
2673                        u64 num_bytes, u64 parent, u64 min_alloc_size,
2674                        u64 root_objectid, u64 ref_generation,
2675                        u64 owner_objectid, u64 owner_offset,
2676                        u64 empty_size, u64 hint_byte,
2677                        u64 search_end, struct btrfs_key *ins, u64 data)
2678 {
2679         int ret;
2680
2681         maybe_lock_mutex(root);
2682
2683         ret = __btrfs_reserve_extent(trans, root, num_bytes,
2684                                      min_alloc_size, empty_size, hint_byte,
2685                                      search_end, ins, data);
2686         BUG_ON(ret);
2687         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
2688                 ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2689                                         root_objectid, ref_generation,
2690                                         owner_objectid, owner_offset, ins);
2691                 BUG_ON(ret);
2692
2693         } else {
2694                 update_reserved_extents(root, ins->objectid, ins->offset, 1);
2695         }
2696         maybe_unlock_mutex(root);
2697         return ret;
2698 }
2699
2700 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
2701                                             struct btrfs_root *root,
2702                                             u64 bytenr, u32 blocksize)
2703 {
2704         struct extent_buffer *buf;
2705
2706         buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
2707         if (!buf)
2708                 return ERR_PTR(-ENOMEM);
2709         btrfs_set_header_generation(buf, trans->transid);
2710         btrfs_tree_lock(buf);
2711         clean_tree_block(trans, root, buf);
2712         btrfs_set_buffer_uptodate(buf);
2713         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2714                 set_extent_dirty(&root->dirty_log_pages, buf->start,
2715                          buf->start + buf->len - 1, GFP_NOFS);
2716         } else {
2717                 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2718                          buf->start + buf->len - 1, GFP_NOFS);
2719         }
2720         trans->blocks_used++;
2721         return buf;
2722 }
2723
2724 /*
2725  * helper function to allocate a block for a given tree
2726  * returns the tree buffer or NULL.
2727  */
2728 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2729                                              struct btrfs_root *root,
2730                                              u32 blocksize, u64 parent,
2731                                              u64 root_objectid,
2732                                              u64 ref_generation,
2733                                              int level,
2734                                              u64 hint,
2735                                              u64 empty_size)
2736 {
2737         struct btrfs_key ins;
2738         int ret;
2739         struct extent_buffer *buf;
2740
2741         ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
2742                                  root_objectid, ref_generation, level, 0,
2743                                  empty_size, hint, (u64)-1, &ins, 0);
2744         if (ret) {
2745                 BUG_ON(ret > 0);
2746                 return ERR_PTR(ret);
2747         }
2748
2749         buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
2750         return buf;
2751 }
2752
2753 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
2754                         struct btrfs_root *root, struct extent_buffer *leaf)
2755 {
2756         u64 leaf_owner;
2757         u64 leaf_generation;
2758         struct btrfs_key key;
2759         struct btrfs_file_extent_item *fi;
2760         int i;
2761         int nritems;
2762         int ret;
2763
2764         BUG_ON(!btrfs_is_leaf(leaf));
2765         nritems = btrfs_header_nritems(leaf);
2766         leaf_owner = btrfs_header_owner(leaf);
2767         leaf_generation = btrfs_header_generation(leaf);
2768
2769         for (i = 0; i < nritems; i++) {
2770                 u64 disk_bytenr;
2771                 cond_resched();
2772
2773                 btrfs_item_key_to_cpu(leaf, &key, i);
2774                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2775                         continue;
2776                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2777                 if (btrfs_file_extent_type(leaf, fi) ==
2778                     BTRFS_FILE_EXTENT_INLINE)
2779                         continue;
2780                 /*
2781                  * FIXME make sure to insert a trans record that
2782                  * repeats the snapshot del on crash
2783                  */
2784                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2785                 if (disk_bytenr == 0)
2786                         continue;
2787
2788                 mutex_lock(&root->fs_info->alloc_mutex);
2789                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2790                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2791                                 leaf->start, leaf_owner, leaf_generation,
2792                                 key.objectid, key.offset, 0);
2793                 mutex_unlock(&root->fs_info->alloc_mutex);
2794                 BUG_ON(ret);
2795
2796                 atomic_inc(&root->fs_info->throttle_gen);
2797                 wake_up(&root->fs_info->transaction_throttle);
2798                 cond_resched();
2799         }
2800         return 0;
2801 }
2802
2803 static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
2804                                         struct btrfs_root *root,
2805                                         struct btrfs_leaf_ref *ref)
2806 {
2807         int i;
2808         int ret;
2809         struct btrfs_extent_info *info = ref->extents;
2810
2811         for (i = 0; i < ref->nritems; i++) {
2812                 mutex_lock(&root->fs_info->alloc_mutex);
2813                 ret = __btrfs_free_extent(trans, root, info->bytenr,
2814                                           info->num_bytes, ref->bytenr,
2815                                           ref->owner, ref->generation,
2816                                           info->objectid, info->offset, 0);
2817                 mutex_unlock(&root->fs_info->alloc_mutex);
2818
2819                 atomic_inc(&root->fs_info->throttle_gen);
2820                 wake_up(&root->fs_info->transaction_throttle);
2821                 cond_resched();
2822
2823                 BUG_ON(ret);
2824                 info++;
2825         }
2826
2827         return 0;
2828 }
2829
2830 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2831                               u32 *refs)
2832 {
2833         int ret;
2834
2835         ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs);
2836         BUG_ON(ret);
2837
2838 #if 0 // some debugging code in case we see problems here
2839         /* if the refs count is one, it won't get increased again.  But
2840          * if the ref count is > 1, someone may be decreasing it at
2841          * the same time we are.
2842          */
2843         if (*refs != 1) {
2844                 struct extent_buffer *eb = NULL;
2845                 eb = btrfs_find_create_tree_block(root, start, len);
2846                 if (eb)
2847                         btrfs_tree_lock(eb);
2848
2849                 mutex_lock(&root->fs_info->alloc_mutex);
2850                 ret = lookup_extent_ref(NULL, root, start, len, refs);
2851                 BUG_ON(ret);
2852                 mutex_unlock(&root->fs_info->alloc_mutex);
2853
2854                 if (eb) {
2855                         btrfs_tree_unlock(eb);
2856                         free_extent_buffer(eb);
2857                 }
2858                 if (*refs == 1) {
2859                         printk("block %llu went down to one during drop_snap\n",
2860                                (unsigned long long)start);
2861                 }
2862
2863         }
2864 #endif
2865
2866         cond_resched();
2867         return ret;
2868 }
2869
2870 /*
2871  * helper function for drop_snapshot, this walks down the tree dropping ref
2872  * counts as it goes.
2873  */
2874 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2875                                    struct btrfs_root *root,
2876                                    struct btrfs_path *path, int *level)
2877 {
2878         u64 root_owner;
2879         u64 root_gen;
2880         u64 bytenr;
2881         u64 ptr_gen;
2882         struct extent_buffer *next;
2883         struct extent_buffer *cur;
2884         struct extent_buffer *parent;
2885         struct btrfs_leaf_ref *ref;
2886         u32 blocksize;
2887         int ret;
2888         u32 refs;
2889
2890         WARN_ON(*level < 0);
2891         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2892         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
2893                                 path->nodes[*level]->len, &refs);
2894         BUG_ON(ret);
2895         if (refs > 1)
2896                 goto out;
2897
2898         /*
2899          * walk down to the last node level and free all the leaves
2900          */
2901         while(*level >= 0) {
2902                 WARN_ON(*level < 0);
2903                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2904                 cur = path->nodes[*level];
2905
2906                 if (btrfs_header_level(cur) != *level)
2907                         WARN_ON(1);
2908
2909                 if (path->slots[*level] >=
2910                     btrfs_header_nritems(cur))
2911                         break;
2912                 if (*level == 0) {
2913                         ret = btrfs_drop_leaf_ref(trans, root, cur);
2914                         BUG_ON(ret);
2915                         break;
2916                 }
2917                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2918                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2919                 blocksize = btrfs_level_size(root, *level - 1);
2920
2921                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
2922                 BUG_ON(ret);
2923                 if (refs != 1) {
2924                         parent = path->nodes[*level];
2925                         root_owner = btrfs_header_owner(parent);
2926                         root_gen = btrfs_header_generation(parent);
2927                         path->slots[*level]++;
2928
2929                         mutex_lock(&root->fs_info->alloc_mutex);
2930                         ret = __btrfs_free_extent(trans, root, bytenr,
2931                                                 blocksize, parent->start,
2932                                                 root_owner, root_gen, 0, 0, 1);
2933                         BUG_ON(ret);
2934                         mutex_unlock(&root->fs_info->alloc_mutex);
2935
2936                         atomic_inc(&root->fs_info->throttle_gen);
2937                         wake_up(&root->fs_info->transaction_throttle);
2938                         cond_resched();
2939
2940                         continue;
2941                 }
2942                 /*
2943                  * at this point, we have a single ref, and since the
2944                  * only place referencing this extent is a dead root
2945                  * the reference count should never go higher.
2946                  * So, we don't need to check it again
2947                  */
2948                 if (*level == 1) {
2949                         ref = btrfs_lookup_leaf_ref(root, bytenr);
2950                         if (ref) {
2951                                 ret = cache_drop_leaf_ref(trans, root, ref);
2952                                 BUG_ON(ret);
2953                                 btrfs_remove_leaf_ref(root, ref);
2954                                 btrfs_free_leaf_ref(root, ref);
2955                                 *level = 0;
2956                                 break;
2957                         }
2958                         if (printk_ratelimit())
2959                                 printk("leaf ref miss for bytenr %llu\n",
2960                                        (unsigned long long)bytenr);
2961                 }
2962                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2963                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2964                         free_extent_buffer(next);
2965
2966                         next = read_tree_block(root, bytenr, blocksize,
2967                                                ptr_gen);
2968                         cond_resched();
2969 #if 0
2970                         /*
2971                          * this is a debugging check and can go away
2972                          * the ref should never go all the way down to 1
2973                          * at this point
2974                          */
2975                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2976                                                 &refs);
2977                         BUG_ON(ret);
2978                         WARN_ON(refs != 1);
2979 #endif
2980                 }
2981                 WARN_ON(*level <= 0);
2982                 if (path->nodes[*level-1])
2983                         free_extent_buffer(path->nodes[*level-1]);
2984                 path->nodes[*level-1] = next;
2985                 *level = btrfs_header_level(next);
2986                 path->slots[*level] = 0;
2987                 cond_resched();
2988         }
2989 out:
2990         WARN_ON(*level < 0);
2991         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2992
2993         if (path->nodes[*level] == root->node) {
2994                 parent = path->nodes[*level];
2995                 bytenr = path->nodes[*level]->start;
2996         } else {
2997                 parent = path->nodes[*level + 1];
2998                 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
2999         }
3000
3001         blocksize = btrfs_level_size(root, *level);
3002         root_owner = btrfs_header_owner(parent);
3003         root_gen = btrfs_header_generation(parent);
3004
3005         mutex_lock(&root->fs_info->alloc_mutex);
3006         ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
3007                                   parent->start, root_owner, root_gen,
3008                                   0, 0, 1);
3009         mutex_unlock(&root->fs_info->alloc_mutex);
3010         free_extent_buffer(path->nodes[*level]);
3011         path->nodes[*level] = NULL;
3012         *level += 1;
3013         BUG_ON(ret);
3014
3015         cond_resched();
3016         return 0;
3017 }
3018
3019 /*
3020  * helper for dropping snapshots.  This walks back up the tree in the path
3021  * to find the first node higher up where we haven't yet gone through
3022  * all the slots
3023  */
3024 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3025                                  struct btrfs_root *root,
3026                                  struct btrfs_path *path, int *level)
3027 {
3028         u64 root_owner;
3029         u64 root_gen;
3030         struct btrfs_root_item *root_item = &root->root_item;
3031         int i;
3032         int slot;
3033         int ret;
3034
3035         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
3036                 slot = path->slots[i];
3037                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3038                         struct extent_buffer *node;
3039                         struct btrfs_disk_key disk_key;
3040                         node = path->nodes[i];
3041                         path->slots[i]++;
3042                         *level = i;
3043                         WARN_ON(*level == 0);
3044                         btrfs_node_key(node, &disk_key, path->slots[i]);
3045                         memcpy(&root_item->drop_progress,
3046                                &disk_key, sizeof(disk_key));
3047                         root_item->drop_level = i;
3048                         return 0;
3049                 } else {
3050                         struct extent_buffer *parent;
3051                         if (path->nodes[*level] == root->node)
3052                                 parent = path->nodes[*level];
3053                         else
3054                                 parent = path->nodes[*level + 1];
3055
3056                         root_owner = btrfs_header_owner(parent);
3057                         root_gen = btrfs_header_generation(parent);
3058                         ret = btrfs_free_extent(trans, root,
3059                                                 path->nodes[*level]->start,
3060                                                 path->nodes[*level]->len,
3061                                                 parent->start,
3062                                                 root_owner, root_gen, 0, 0, 1);
3063                         BUG_ON(ret);
3064                         free_extent_buffer(path->nodes[*level]);
3065                         path->nodes[*level] = NULL;
3066                         *level = i + 1;
3067                 }
3068         }
3069         return 1;
3070 }
3071
3072 /*
3073  * drop the reference count on the tree rooted at 'snap'.  This traverses
3074  * the tree freeing any blocks that have a ref count of zero after being
3075  * decremented.
3076  */
3077 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3078                         *root)
3079 {
3080         int ret = 0;
3081         int wret;
3082         int level;
3083         struct btrfs_path *path;
3084         int i;
3085         int orig_level;
3086         struct btrfs_root_item *root_item = &root->root_item;
3087
3088         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
3089         path = btrfs_alloc_path();
3090         BUG_ON(!path);
3091
3092         level = btrfs_header_level(root->node);
3093         orig_level = level;
3094         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3095                 path->nodes[level] = root->node;
3096                 extent_buffer_get(root->node);
3097                 path->slots[level] = 0;
3098         } else {
3099                 struct btrfs_key key;
3100                 struct btrfs_disk_key found_key;
3101                 struct extent_buffer *node;
3102
3103                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3104                 level = root_item->drop_level;
3105                 path->lowest_level = level;
3106                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3107                 if (wret < 0) {
3108                         ret = wret;
3109                         goto out;
3110                 }
3111                 node = path->nodes[level];
3112                 btrfs_node_key(node, &found_key, path->slots[level]);
3113                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3114                                sizeof(found_key)));
3115                 /*
3116                  * unlock our path, this is safe because only this
3117                  * function is allowed to delete this snapshot
3118                  */
3119                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
3120                         if (path->nodes[i] && path->locks[i]) {
3121                                 path->locks[i] = 0;
3122                                 btrfs_tree_unlock(path->nodes[i]);
3123                         }
3124                 }
3125         }
3126         while(1) {
3127                 wret = walk_down_tree(trans, root, path, &level);
3128                 if (wret > 0)
3129                         break;
3130                 if (wret < 0)
3131                         ret = wret;
3132
3133                 wret = walk_up_tree(trans, root, path, &level);
3134                 if (wret > 0)
3135                         break;
3136                 if (wret < 0)
3137                         ret = wret;
3138                 if (trans->transaction->in_commit) {
3139                         ret = -EAGAIN;
3140                         break;
3141                 }
3142                 atomic_inc(&root->fs_info->throttle_gen);
3143                 wake_up(&root->fs_info->transaction_throttle);
3144         }
3145         for (i = 0; i <= orig_level; i++) {
3146                 if (path->nodes[i]) {
3147                         free_extent_buffer(path->nodes[i]);
3148                         path->nodes[i] = NULL;
3149                 }
3150         }
3151 out:
3152         btrfs_free_path(path);
3153         return ret;
3154 }
3155
3156 int btrfs_free_block_groups(struct btrfs_fs_info *info)
3157 {
3158         struct btrfs_block_group_cache *block_group;
3159         struct rb_node *n;
3160
3161         mutex_lock(&info->alloc_mutex);
3162         spin_lock(&info->block_group_cache_lock);
3163         while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
3164                 block_group = rb_entry(n, struct btrfs_block_group_cache,
3165                                        cache_node);
3166
3167                 spin_unlock(&info->block_group_cache_lock);
3168                 btrfs_remove_free_space_cache(block_group);
3169                 spin_lock(&info->block_group_cache_lock);
3170
3171                 rb_erase(&block_group->cache_node,
3172                          &info->block_group_cache_tree);
3173
3174                 spin_lock(&block_group->space_info->lock);
3175                 list_del(&block_group->list);
3176                 spin_unlock(&block_group->space_info->lock);
3177                 kfree(block_group);
3178         }
3179         spin_unlock(&info->block_group_cache_lock);
3180         mutex_unlock(&info->alloc_mutex);
3181         return 0;
3182 }
3183
3184 static unsigned long calc_ra(unsigned long start, unsigned long last,
3185                              unsigned long nr)
3186 {
3187         return min(last, start + nr - 1);
3188 }
3189
3190 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
3191                                          u64 len)
3192 {
3193         u64 page_start;
3194         u64 page_end;
3195         unsigned long last_index;
3196         unsigned long i;
3197         struct page *page;
3198         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3199         struct file_ra_state *ra;
3200         unsigned long total_read = 0;
3201         unsigned long ra_pages;
3202         struct btrfs_ordered_extent *ordered;
3203         struct btrfs_trans_handle *trans;
3204
3205         ra = kzalloc(sizeof(*ra), GFP_NOFS);
3206
3207         mutex_lock(&inode->i_mutex);
3208         i = start >> PAGE_CACHE_SHIFT;
3209         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
3210
3211         ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
3212
3213         file_ra_state_init(ra, inode->i_mapping);
3214
3215         for (; i <= last_index; i++) {
3216                 if (total_read % ra_pages == 0) {
3217                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
3218                                        calc_ra(i, last_index, ra_pages));
3219                 }
3220                 total_read++;
3221 again:
3222                 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
3223                         goto truncate_racing;
3224                 page = grab_cache_page(inode->i_mapping, i);
3225                 if (!page) {
3226                         goto out_unlock;
3227                 }
3228                 if (!PageUptodate(page)) {
3229                         btrfs_readpage(NULL, page);
3230                         lock_page(page);
3231                         if (!PageUptodate(page)) {
3232                                 unlock_page(page);
3233                                 page_cache_release(page);
3234                                 goto out_unlock;
3235                         }
3236                 }
3237                 wait_on_page_writeback(page);
3238
3239                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
3240                 page_end = page_start + PAGE_CACHE_SIZE - 1;
3241                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
3242
3243                 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3244                 if (ordered) {
3245                         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3246                         unlock_page(page);
3247                         page_cache_release(page);
3248                         btrfs_start_ordered_extent(inode, ordered, 1);
3249                         btrfs_put_ordered_extent(ordered);
3250                         goto again;
3251                 }
3252                 set_page_extent_mapped(page);
3253
3254                 /*
3255                  * make sure page_mkwrite is called for this page if userland
3256                  * wants to change it from mmap
3257                  */
3258                 clear_page_dirty_for_io(page);
3259
3260                 btrfs_set_extent_delalloc(inode, page_start, page_end);
3261                 set_page_dirty(page);
3262
3263                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3264                 unlock_page(page);
3265                 page_cache_release(page);
3266         }
3267
3268 out_unlock:
3269         /* we have to start the IO in order to get the ordered extents
3270          * instantiated.  This allows the relocation to code to wait
3271          * for all the ordered extents to hit the disk.
3272          *
3273          * Otherwise, it would constantly loop over the same extents
3274          * because the old ones don't get deleted  until the IO is
3275          * started
3276          */
3277         btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1,
3278                                WB_SYNC_NONE);
3279         kfree(ra);
3280         trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
3281         if (trans) {
3282                 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
3283                 mark_inode_dirty(inode);
3284         }
3285         mutex_unlock(&inode->i_mutex);
3286         return 0;
3287
3288 truncate_racing:
3289         vmtruncate(inode, inode->i_size);
3290         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
3291                                            total_read);
3292         goto out_unlock;
3293 }
3294
3295 /*
3296  * The back references tell us which tree holds a ref on a block,
3297  * but it is possible for the tree root field in the reference to
3298  * reflect the original root before a snapshot was made.  In this
3299  * case we should search through all the children of a given root
3300  * to find potential holders of references on a block.
3301  *
3302  * Instead, we do something a little less fancy and just search
3303  * all the roots for a given key/block combination.
3304  */
3305 static int find_root_for_ref(struct btrfs_root *root,
3306                              struct btrfs_path *path,
3307                              struct btrfs_key *key0,
3308                              int level,
3309                              int file_key,
3310                              struct btrfs_root **found_root,
3311                              u64 bytenr)
3312 {
3313         struct btrfs_key root_location;
3314         struct btrfs_root *cur_root = *found_root;
3315         struct btrfs_file_extent_item *file_extent;
3316         u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
3317         u64 found_bytenr;
3318         int ret;
3319
3320         root_location.offset = (u64)-1;
3321         root_location.type = BTRFS_ROOT_ITEM_KEY;
3322         path->lowest_level = level;
3323         path->reada = 0;
3324         while(1) {
3325                 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0);
3326                 found_bytenr = 0;
3327                 if (ret == 0 && file_key) {
3328                         struct extent_buffer *leaf = path->nodes[0];
3329                         file_extent = btrfs_item_ptr(leaf, path->slots[0],
3330                                              struct btrfs_file_extent_item);
3331                         if (btrfs_file_extent_type(leaf, file_extent) ==
3332                             BTRFS_FILE_EXTENT_REG) {
3333                                 found_bytenr =
3334                                         btrfs_file_extent_disk_bytenr(leaf,
3335                                                                file_extent);
3336                        }
3337                 } else if (!file_key) {
3338                         if (path->nodes[level])
3339                                 found_bytenr = path->nodes[level]->start;
3340                 }
3341
3342                 btrfs_release_path(cur_root, path);
3343
3344                 if (found_bytenr == bytenr) {
3345                         *found_root = cur_root;
3346                         ret = 0;
3347                         goto out;
3348                 }
3349                 ret = btrfs_search_root(root->fs_info->tree_root,
3350                                         root_search_start, &root_search_start);
3351                 if (ret)
3352                         break;
3353
3354                 root_location.objectid = root_search_start;
3355                 cur_root = btrfs_read_fs_root_no_name(root->fs_info,
3356                                                       &root_location);
3357                 if (!cur_root) {
3358                         ret = 1;
3359                         break;
3360                 }
3361         }
3362 out:
3363         path->lowest_level = 0;
3364         return ret;
3365 }
3366
3367 /*
3368  * note, this releases the path
3369  */
3370 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
3371                                   struct btrfs_path *path,
3372                                   struct btrfs_key *extent_key,
3373                                   u64 *last_file_objectid,
3374                                   u64 *last_file_offset,
3375                                   u64 *last_file_root,
3376                                   u64 last_extent)
3377 {
3378         struct inode *inode;
3379         struct btrfs_root *found_root;
3380         struct btrfs_key root_location;
3381         struct btrfs_key found_key;
3382         struct btrfs_extent_ref *ref;
3383         u64 ref_root;
3384         u64 ref_gen;
3385         u64 ref_objectid;
3386         u64 ref_offset;
3387         int ret;
3388         int level;
3389
3390         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
3391
3392         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
3393                              struct btrfs_extent_ref);
3394         ref_root = btrfs_ref_root(path->nodes[0], ref);
3395         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
3396         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
3397         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
3398         btrfs_release_path(extent_root, path);
3399
3400         root_location.objectid = ref_root;
3401         if (ref_gen == 0)
3402                 root_location.offset = 0;
3403         else
3404                 root_location.offset = (u64)-1;
3405         root_location.type = BTRFS_ROOT_ITEM_KEY;
3406
3407         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
3408                                                 &root_location);
3409         BUG_ON(!found_root);
3410         mutex_unlock(&extent_root->fs_info->alloc_mutex);
3411
3412         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
3413                 found_key.objectid = ref_objectid;
3414                 found_key.type = BTRFS_EXTENT_DATA_KEY;
3415                 found_key.offset = ref_offset;
3416                 level = 0;
3417
3418                 if (last_extent == extent_key->objectid &&
3419                     *last_file_objectid == ref_objectid &&
3420                     *last_file_offset == ref_offset &&
3421                     *last_file_root == ref_root)
3422                         goto out;
3423
3424                 ret = find_root_for_ref(extent_root, path, &found_key,
3425                                         level, 1, &found_root,
3426                                         extent_key->objectid);
3427
3428                 if (ret)
3429                         goto out;
3430
3431                 if (last_extent == extent_key->objectid &&
3432                     *last_file_objectid == ref_objectid &&
3433                     *last_file_offset == ref_offset &&
3434                     *last_file_root == ref_root)
3435                         goto out;
3436
3437                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
3438                                           ref_objectid, found_root);
3439                 if (inode->i_state & I_NEW) {
3440                         /* the inode and parent dir are two different roots */
3441                         BTRFS_I(inode)->root = found_root;
3442                         BTRFS_I(inode)->location.objectid = ref_objectid;
3443                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
3444                         BTRFS_I(inode)->location.offset = 0;
3445                         btrfs_read_locked_inode(inode);
3446                         unlock_new_inode(inode);
3447
3448                 }
3449                 /* this can happen if the reference is not against
3450                  * the latest version of the tree root
3451                  */
3452                 if (is_bad_inode(inode))
3453                         goto out;
3454
3455                 *last_file_objectid = inode->i_ino;
3456                 *last_file_root = found_root->root_key.objectid;
3457                 *last_file_offset = ref_offset;
3458
3459                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
3460                 iput(inode);
3461         } else {
3462                 struct btrfs_trans_handle *trans;
3463                 struct extent_buffer *eb;
3464                 int needs_lock = 0;
3465
3466                 eb = read_tree_block(found_root, extent_key->objectid,
3467                                      extent_key->offset, 0);
3468                 btrfs_tree_lock(eb);
3469                 level = btrfs_header_level(eb);
3470
3471                 if (level == 0)
3472                         btrfs_item_key_to_cpu(eb, &found_key, 0);
3473                 else
3474                         btrfs_node_key_to_cpu(eb, &found_key, 0);
3475
3476                 btrfs_tree_unlock(eb);
3477                 free_extent_buffer(eb);
3478
3479                 ret = find_root_for_ref(extent_root, path, &found_key,
3480                                         level, 0, &found_root,
3481                                         extent_key->objectid);
3482
3483                 if (ret)
3484                         goto out;
3485
3486                 /*
3487                  * right here almost anything could happen to our key,
3488                  * but that's ok.  The cow below will either relocate it
3489                  * or someone else will have relocated it.  Either way,
3490                  * it is in a different spot than it was before and
3491                  * we're happy.
3492                  */
3493
3494                 trans = btrfs_start_transaction(found_root, 1);
3495
3496                 if (found_root == extent_root->fs_info->extent_root ||
3497                     found_root == extent_root->fs_info->chunk_root ||
3498                     found_root == extent_root->fs_info->dev_root) {
3499                         needs_lock = 1;
3500                         mutex_lock(&extent_root->fs_info->alloc_mutex);
3501                 }
3502
3503                 path->lowest_level = level;
3504                 path->reada = 2;
3505                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
3506                                         0, 1);
3507                 path->lowest_level = 0;
3508                 btrfs_release_path(found_root, path);
3509
3510                 if (found_root == found_root->fs_info->extent_root)
3511                         btrfs_extent_post_op(trans, found_root);
3512                 if (needs_lock)
3513                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
3514
3515                 btrfs_end_transaction(trans, found_root);
3516
3517         }
3518 out:
3519         mutex_lock(&extent_root->fs_info->alloc_mutex);
3520         return 0;
3521 }
3522
3523 static int noinline del_extent_zero(struct btrfs_root *extent_root,
3524                                     struct btrfs_path *path,
3525                                     struct btrfs_key *extent_key)
3526 {
3527         int ret;
3528         struct btrfs_trans_handle *trans;
3529
3530         trans = btrfs_start_transaction(extent_root, 1);
3531         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
3532         if (ret > 0) {
3533                 ret = -EIO;
3534                 goto out;
3535         }
3536         if (ret < 0)
3537                 goto out;
3538         ret = btrfs_del_item(trans, extent_root, path);
3539 out:
3540         btrfs_end_transaction(trans, extent_root);
3541         return ret;
3542 }
3543
3544 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
3545                                         struct btrfs_path *path,
3546                                         struct btrfs_key *extent_key)
3547 {
3548         struct btrfs_key key;
3549         struct btrfs_key found_key;
3550         struct extent_buffer *leaf;
3551         u64 last_file_objectid = 0;
3552         u64 last_file_root = 0;
3553         u64 last_file_offset = (u64)-1;
3554         u64 last_extent = 0;
3555         u32 nritems;
3556         u32 item_size;
3557         int ret = 0;
3558
3559         if (extent_key->objectid == 0) {
3560                 ret = del_extent_zero(extent_root, path, extent_key);
3561                 goto out;
3562         }
3563         key.objectid = extent_key->objectid;
3564         key.type = BTRFS_EXTENT_REF_KEY;
3565         key.offset = 0;
3566
3567         while(1) {
3568                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3569
3570                 if (ret < 0)
3571                         goto out;
3572
3573                 ret = 0;
3574                 leaf = path->nodes[0];
3575                 nritems = btrfs_header_nritems(leaf);
3576                 if (path->slots[0] == nritems) {
3577                         ret = btrfs_next_leaf(extent_root, path);
3578                         if (ret > 0) {
3579                                 ret = 0;
3580                                 goto out;
3581                         }
3582                         if (ret < 0)
3583                                 goto out;
3584                         leaf = path->nodes[0];
3585                 }
3586
3587                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3588                 if (found_key.objectid != extent_key->objectid) {
3589                         break;
3590                 }
3591
3592                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
3593                         break;
3594                 }
3595
3596                 key.offset = found_key.offset + 1;
3597                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3598
3599                 ret = relocate_one_reference(extent_root, path, extent_key,
3600                                              &last_file_objectid,
3601                                              &last_file_offset,
3602                                              &last_file_root, last_extent);
3603                 if (ret)
3604                         goto out;
3605                 last_extent = extent_key->objectid;
3606         }
3607         ret = 0;
3608 out:
3609         btrfs_release_path(extent_root, path);
3610         return ret;
3611 }
3612
3613 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
3614 {
3615         u64 num_devices;
3616         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
3617                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
3618
3619         num_devices = root->fs_info->fs_devices->num_devices;
3620         if (num_devices == 1) {
3621                 stripped |= BTRFS_BLOCK_GROUP_DUP;
3622                 stripped = flags & ~stripped;
3623
3624                 /* turn raid0 into single device chunks */
3625                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
3626                         return stripped;
3627
3628                 /* turn mirroring into duplication */
3629                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3630                              BTRFS_BLOCK_GROUP_RAID10))
3631                         return stripped | BTRFS_BLOCK_GROUP_DUP;
3632                 return flags;
3633         } else {
3634                 /* they already had raid on here, just return */
3635                 if (flags & stripped)
3636                         return flags;
3637
3638                 stripped |= BTRFS_BLOCK_GROUP_DUP;
3639                 stripped = flags & ~stripped;
3640
3641                 /* switch duplicated blocks with raid1 */
3642                 if (flags & BTRFS_BLOCK_GROUP_DUP)
3643                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
3644
3645                 /* turn single device chunks into raid0 */
3646                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
3647         }
3648         return flags;
3649 }
3650
3651 int __alloc_chunk_for_shrink(struct btrfs_root *root,
3652                      struct btrfs_block_group_cache *shrink_block_group,
3653                      int force)
3654 {
3655         struct btrfs_trans_handle *trans;
3656         u64 new_alloc_flags;
3657         u64 calc;
3658
3659         spin_lock(&shrink_block_group->lock);
3660         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
3661                 spin_unlock(&shrink_block_group->lock);
3662                 mutex_unlock(&root->fs_info->alloc_mutex);
3663
3664                 trans = btrfs_start_transaction(root, 1);
3665                 mutex_lock(&root->fs_info->alloc_mutex);
3666                 spin_lock(&shrink_block_group->lock);
3667
3668                 new_alloc_flags = update_block_group_flags(root,
3669                                                    shrink_block_group->flags);
3670                 if (new_alloc_flags != shrink_block_group->flags) {
3671                         calc =
3672                              btrfs_block_group_used(&shrink_block_group->item);
3673                 } else {
3674                         calc = shrink_block_group->key.offset;
3675                 }
3676                 spin_unlock(&shrink_block_group->lock);
3677
3678                 do_chunk_alloc(trans, root->fs_info->extent_root,
3679                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
3680
3681                 mutex_unlock(&root->fs_info->alloc_mutex);
3682                 btrfs_end_transaction(trans, root);
3683                 mutex_lock(&root->fs_info->alloc_mutex);
3684         } else
3685                 spin_unlock(&shrink_block_group->lock);
3686         return 0;
3687 }
3688
3689 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
3690 {
3691         struct btrfs_trans_handle *trans;
3692         struct btrfs_root *tree_root = root->fs_info->tree_root;
3693         struct btrfs_path *path;
3694         u64 cur_byte;
3695         u64 total_found;
3696         u64 shrink_last_byte;
3697         struct btrfs_block_group_cache *shrink_block_group;
3698         struct btrfs_key key;
3699         struct btrfs_key found_key;
3700         struct extent_buffer *leaf;
3701         u32 nritems;
3702         int ret;
3703         int progress;
3704
3705         mutex_lock(&root->fs_info->alloc_mutex);
3706         shrink_block_group = btrfs_lookup_block_group(root->fs_info,
3707                                                       shrink_start);
3708         BUG_ON(!shrink_block_group);
3709
3710         shrink_last_byte = shrink_block_group->key.objectid +
3711                 shrink_block_group->key.offset;
3712
3713         shrink_block_group->space_info->total_bytes -=
3714                 shrink_block_group->key.offset;
3715         path = btrfs_alloc_path();
3716         root = root->fs_info->extent_root;
3717         path->reada = 2;
3718
3719         printk("btrfs relocating block group %llu flags %llu\n",
3720                (unsigned long long)shrink_start,
3721                (unsigned long long)shrink_block_group->flags);
3722
3723         __alloc_chunk_for_shrink(root, shrink_block_group, 1);
3724
3725 again:
3726
3727         shrink_block_group->ro = 1;
3728
3729         total_found = 0;
3730         progress = 0;
3731         key.objectid = shrink_start;
3732         key.offset = 0;
3733         key.type = 0;
3734         cur_byte = key.objectid;
3735
3736         mutex_unlock(&root->fs_info->alloc_mutex);
3737
3738         btrfs_start_delalloc_inodes(root);
3739         btrfs_wait_ordered_extents(tree_root, 0);
3740
3741         mutex_lock(&root->fs_info->alloc_mutex);
3742
3743         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3744         if (ret < 0)
3745                 goto out;
3746
3747         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
3748         if (ret < 0)
3749                 goto out;
3750
3751         if (ret == 0) {
3752                 leaf = path->nodes[0];
3753                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3754                 if (found_key.objectid + found_key.offset > shrink_start &&
3755                     found_key.objectid < shrink_last_byte) {
3756                         cur_byte = found_key.objectid;
3757                         key.objectid = cur_byte;
3758                 }
3759         }
3760         btrfs_release_path(root, path);
3761
3762         while(1) {
3763                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3764                 if (ret < 0)
3765                         goto out;
3766
3767 next:
3768                 leaf = path->nodes[0];
3769                 nritems = btrfs_header_nritems(leaf);
3770                 if (path->slots[0] >= nritems) {
3771                         ret = btrfs_next_leaf(root, path);
3772                         if (ret < 0)
3773                                 goto out;
3774                         if (ret == 1) {
3775                                 ret = 0;
3776                                 break;
3777                         }
3778                         leaf = path->nodes[0];
3779                         nritems = btrfs_header_nritems(leaf);
3780                 }
3781
3782                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3783
3784                 if (found_key.objectid >= shrink_last_byte)
3785                         break;
3786
3787                 if (progress && need_resched()) {
3788                         memcpy(&key, &found_key, sizeof(key));
3789                         cond_resched();
3790                         btrfs_release_path(root, path);
3791                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
3792                         progress = 0;
3793                         goto next;
3794                 }
3795                 progress = 1;
3796
3797                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
3798                     found_key.objectid + found_key.offset <= cur_byte) {
3799                         memcpy(&key, &found_key, sizeof(key));
3800                         key.offset++;
3801                         path->slots[0]++;
3802                         goto next;
3803                 }
3804
3805                 total_found++;
3806                 cur_byte = found_key.objectid + found_key.offset;
3807                 key.objectid = cur_byte;
3808                 btrfs_release_path(root, path);
3809                 ret = relocate_one_extent(root, path, &found_key);
3810                 __alloc_chunk_for_shrink(root, shrink_block_group, 0);
3811         }
3812
3813         btrfs_release_path(root, path);
3814
3815         if (total_found > 0) {
3816                 printk("btrfs relocate found %llu last extent was %llu\n",
3817                        (unsigned long long)total_found,
3818                        (unsigned long long)found_key.objectid);
3819                 mutex_unlock(&root->fs_info->alloc_mutex);
3820                 trans = btrfs_start_transaction(tree_root, 1);
3821                 btrfs_commit_transaction(trans, tree_root);
3822
3823                 btrfs_clean_old_snapshots(tree_root);
3824
3825                 btrfs_start_delalloc_inodes(root);
3826                 btrfs_wait_ordered_extents(tree_root, 0);
3827
3828                 trans = btrfs_start_transaction(tree_root, 1);
3829                 btrfs_commit_transaction(trans, tree_root);
3830                 mutex_lock(&root->fs_info->alloc_mutex);
3831                 goto again;
3832         }
3833
3834         /*
3835          * we've freed all the extents, now remove the block
3836          * group item from the tree
3837          */
3838         mutex_unlock(&root->fs_info->alloc_mutex);
3839
3840         trans = btrfs_start_transaction(root, 1);
3841
3842         mutex_lock(&root->fs_info->alloc_mutex);
3843         memcpy(&key, &shrink_block_group->key, sizeof(key));
3844
3845         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3846         if (ret > 0)
3847                 ret = -EIO;
3848         if (ret < 0) {
3849                 btrfs_end_transaction(trans, root);
3850                 goto out;
3851         }
3852
3853         spin_lock(&root->fs_info->block_group_cache_lock);
3854         rb_erase(&shrink_block_group->cache_node,
3855                  &root->fs_info->block_group_cache_tree);
3856         spin_unlock(&root->fs_info->block_group_cache_lock);
3857
3858         ret = btrfs_remove_free_space(shrink_block_group, key.objectid,
3859                                       key.offset);
3860         if (ret) {
3861                 btrfs_end_transaction(trans, root);
3862                 goto out;
3863         }
3864         /*
3865         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3866         kfree(shrink_block_group);
3867         */
3868
3869         btrfs_del_item(trans, root, path);
3870         btrfs_release_path(root, path);
3871         mutex_unlock(&root->fs_info->alloc_mutex);
3872         btrfs_commit_transaction(trans, root);
3873
3874         mutex_lock(&root->fs_info->alloc_mutex);
3875
3876         /* the code to unpin extents might set a few bits in the free
3877          * space cache for this range again
3878          */
3879         /* XXX? */
3880         ret = btrfs_remove_free_space(shrink_block_group, key.objectid,
3881                                       key.offset);
3882 out:
3883         btrfs_free_path(path);
3884         mutex_unlock(&root->fs_info->alloc_mutex);
3885         return ret;
3886 }
3887
3888 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3889                            struct btrfs_key *key)
3890 {
3891         int ret = 0;
3892         struct btrfs_key found_key;
3893         struct extent_buffer *leaf;
3894         int slot;
3895
3896         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3897         if (ret < 0)
3898                 goto out;
3899
3900         while(1) {
3901                 slot = path->slots[0];
3902                 leaf = path->nodes[0];
3903                 if (slot >= btrfs_header_nritems(leaf)) {
3904                         ret = btrfs_next_leaf(root, path);
3905                         if (ret == 0)
3906                                 continue;
3907                         if (ret < 0)
3908                                 goto out;
3909                         break;
3910                 }
3911                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3912
3913                 if (found_key.objectid >= key->objectid &&
3914                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3915                         ret = 0;
3916                         goto out;
3917                 }
3918                 path->slots[0]++;
3919         }
3920         ret = -ENOENT;
3921 out:
3922         return ret;
3923 }
3924
3925 int btrfs_read_block_groups(struct btrfs_root *root)
3926 {
3927         struct btrfs_path *path;
3928         int ret;
3929         struct btrfs_block_group_cache *cache;
3930         struct btrfs_fs_info *info = root->fs_info;
3931         struct btrfs_space_info *space_info;
3932         struct btrfs_key key;
3933         struct btrfs_key found_key;
3934         struct extent_buffer *leaf;
3935
3936         root = info->extent_root;
3937         key.objectid = 0;
3938         key.offset = 0;
3939         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3940         path = btrfs_alloc_path();
3941         if (!path)
3942                 return -ENOMEM;
3943
3944         mutex_lock(&root->fs_info->alloc_mutex);
3945         while(1) {
3946                 ret = find_first_block_group(root, path, &key);
3947                 if (ret > 0) {
3948                         ret = 0;
3949                         goto error;
3950                 }
3951                 if (ret != 0)
3952                         goto error;
3953
3954                 leaf = path->nodes[0];
3955                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3956                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3957                 if (!cache) {
3958                         ret = -ENOMEM;
3959                         break;
3960                 }
3961
3962                 spin_lock_init(&cache->lock);
3963                 INIT_LIST_HEAD(&cache->list);
3964                 read_extent_buffer(leaf, &cache->item,
3965                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
3966                                    sizeof(cache->item));
3967                 memcpy(&cache->key, &found_key, sizeof(found_key));
3968
3969                 key.objectid = found_key.objectid + found_key.offset;
3970                 btrfs_release_path(root, path);
3971                 cache->flags = btrfs_block_group_flags(&cache->item);
3972
3973                 ret = update_space_info(info, cache->flags, found_key.offset,
3974                                         btrfs_block_group_used(&cache->item),
3975                                         &space_info);
3976                 BUG_ON(ret);
3977                 cache->space_info = space_info;
3978                 spin_lock(&space_info->lock);
3979                 list_add(&cache->list, &space_info->block_groups);
3980                 spin_unlock(&space_info->lock);
3981
3982                 ret = btrfs_add_block_group_cache(root->fs_info, cache);
3983                 BUG_ON(ret);
3984         }
3985         ret = 0;
3986 error:
3987         btrfs_free_path(path);
3988         mutex_unlock(&root->fs_info->alloc_mutex);
3989         return ret;
3990 }
3991
3992 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3993                            struct btrfs_root *root, u64 bytes_used,
3994                            u64 type, u64 chunk_objectid, u64 chunk_offset,
3995                            u64 size)
3996 {
3997         int ret;
3998         struct btrfs_root *extent_root;
3999         struct btrfs_block_group_cache *cache;
4000
4001         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
4002         extent_root = root->fs_info->extent_root;
4003
4004         root->fs_info->last_trans_new_blockgroup = trans->transid;
4005
4006         cache = kzalloc(sizeof(*cache), GFP_NOFS);
4007         if (!cache)
4008                 return -ENOMEM;
4009
4010         cache->key.objectid = chunk_offset;
4011         cache->key.offset = size;
4012         spin_lock_init(&cache->lock);
4013         INIT_LIST_HEAD(&cache->list);
4014         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
4015
4016         btrfs_set_block_group_used(&cache->item, bytes_used);
4017         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
4018         cache->flags = type;
4019         btrfs_set_block_group_flags(&cache->item, type);
4020
4021         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
4022                                 &cache->space_info);
4023         BUG_ON(ret);
4024         spin_lock(&cache->space_info->lock);
4025         list_add(&cache->list, &cache->space_info->block_groups);
4026         spin_unlock(&cache->space_info->lock);
4027
4028         ret = btrfs_add_block_group_cache(root->fs_info, cache);
4029         BUG_ON(ret);
4030
4031         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
4032                                 sizeof(cache->item));
4033         BUG_ON(ret);
4034
4035         finish_current_insert(trans, extent_root);
4036         ret = del_pending_extents(trans, extent_root);
4037         BUG_ON(ret);
4038         set_avail_alloc_bits(extent_root->fs_info, type);
4039
4040         return 0;
4041 }