Btrfs: fix sleep with spinlock held during unmount
[safe/jmp/linux-2.6] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include "hash.h"
23 #include "crc32c.h"
24 #include "ctree.h"
25 #include "disk-io.h"
26 #include "print-tree.h"
27 #include "transaction.h"
28 #include "volumes.h"
29 #include "locking.h"
30 #include "ref-cache.h"
31
32 #define PENDING_EXTENT_INSERT 0
33 #define PENDING_EXTENT_DELETE 1
34 #define PENDING_BACKREF_UPDATE 2
35
36 struct pending_extent_op {
37         int type;
38         u64 bytenr;
39         u64 num_bytes;
40         u64 parent;
41         u64 orig_parent;
42         u64 generation;
43         u64 orig_generation;
44         int level;
45 };
46
47 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
48                                  btrfs_root *extent_root);
49 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
50                                btrfs_root *extent_root);
51 static struct btrfs_block_group_cache *
52 __btrfs_find_block_group(struct btrfs_root *root,
53                          struct btrfs_block_group_cache *hint,
54                          u64 search_start, int data, int owner);
55
56 void maybe_lock_mutex(struct btrfs_root *root)
57 {
58         if (root != root->fs_info->extent_root &&
59             root != root->fs_info->chunk_root &&
60             root != root->fs_info->dev_root) {
61                 mutex_lock(&root->fs_info->alloc_mutex);
62         }
63 }
64
65 void maybe_unlock_mutex(struct btrfs_root *root)
66 {
67         if (root != root->fs_info->extent_root &&
68             root != root->fs_info->chunk_root &&
69             root != root->fs_info->dev_root) {
70                 mutex_unlock(&root->fs_info->alloc_mutex);
71         }
72 }
73
74 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
75 {
76         return (cache->flags & bits) == bits;
77 }
78
79 /*
80  * this adds the block group to the fs_info rb tree for the block group
81  * cache
82  */
83 int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
84                                 struct btrfs_block_group_cache *block_group)
85 {
86         struct rb_node **p;
87         struct rb_node *parent = NULL;
88         struct btrfs_block_group_cache *cache;
89
90         spin_lock(&info->block_group_cache_lock);
91         p = &info->block_group_cache_tree.rb_node;
92
93         while (*p) {
94                 parent = *p;
95                 cache = rb_entry(parent, struct btrfs_block_group_cache,
96                                  cache_node);
97                 if (block_group->key.objectid < cache->key.objectid) {
98                         p = &(*p)->rb_left;
99                 } else if (block_group->key.objectid > cache->key.objectid) {
100                         p = &(*p)->rb_right;
101                 } else {
102                         spin_unlock(&info->block_group_cache_lock);
103                         return -EEXIST;
104                 }
105         }
106
107         rb_link_node(&block_group->cache_node, parent, p);
108         rb_insert_color(&block_group->cache_node,
109                         &info->block_group_cache_tree);
110         spin_unlock(&info->block_group_cache_lock);
111
112         return 0;
113 }
114
115 /*
116  * This will return the block group at or after bytenr if contains is 0, else
117  * it will return the block group that contains the bytenr
118  */
119 static struct btrfs_block_group_cache *
120 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
121                               int contains)
122 {
123         struct btrfs_block_group_cache *cache, *ret = NULL;
124         struct rb_node *n;
125         u64 end, start;
126
127         spin_lock(&info->block_group_cache_lock);
128         n = info->block_group_cache_tree.rb_node;
129
130         while (n) {
131                 cache = rb_entry(n, struct btrfs_block_group_cache,
132                                  cache_node);
133                 end = cache->key.objectid + cache->key.offset - 1;
134                 start = cache->key.objectid;
135
136                 if (bytenr < start) {
137                         if (!contains && (!ret || start < ret->key.objectid))
138                                 ret = cache;
139                         n = n->rb_left;
140                 } else if (bytenr > start) {
141                         if (contains && bytenr <= end) {
142                                 ret = cache;
143                                 break;
144                         }
145                         n = n->rb_right;
146                 } else {
147                         ret = cache;
148                         break;
149                 }
150         }
151         spin_unlock(&info->block_group_cache_lock);
152
153         return ret;
154 }
155
156 /*
157  * this is only called by cache_block_group, since we could have freed extents
158  * we need to check the pinned_extents for any extents that can't be used yet
159  * since their free space will be released as soon as the transaction commits.
160  */
161 static int add_new_free_space(struct btrfs_block_group_cache *block_group,
162                               struct btrfs_fs_info *info, u64 start, u64 end)
163 {
164         u64 extent_start, extent_end, size;
165         int ret;
166
167         while (start < end) {
168                 ret = find_first_extent_bit(&info->pinned_extents, start,
169                                             &extent_start, &extent_end,
170                                             EXTENT_DIRTY);
171                 if (ret)
172                         break;
173
174                 if (extent_start == start) {
175                         start = extent_end + 1;
176                 } else if (extent_start > start && extent_start < end) {
177                         size = extent_start - start;
178                         ret = btrfs_add_free_space(block_group, start, size);
179                         BUG_ON(ret);
180                         start = extent_end + 1;
181                 } else {
182                         break;
183                 }
184         }
185
186         if (start < end) {
187                 size = end - start;
188                 ret = btrfs_add_free_space(block_group, start, size);
189                 BUG_ON(ret);
190         }
191
192         return 0;
193 }
194
195 static int cache_block_group(struct btrfs_root *root,
196                              struct btrfs_block_group_cache *block_group)
197 {
198         struct btrfs_path *path;
199         int ret = 0;
200         struct btrfs_key key;
201         struct extent_buffer *leaf;
202         int slot;
203         u64 last = 0;
204         u64 first_free;
205         int found = 0;
206
207         if (!block_group)
208                 return 0;
209
210         root = root->fs_info->extent_root;
211
212         if (block_group->cached)
213                 return 0;
214
215         path = btrfs_alloc_path();
216         if (!path)
217                 return -ENOMEM;
218
219         path->reada = 2;
220         /*
221          * we get into deadlocks with paths held by callers of this function.
222          * since the alloc_mutex is protecting things right now, just
223          * skip the locking here
224          */
225         path->skip_locking = 1;
226         first_free = max_t(u64, block_group->key.objectid,
227                            BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
228         key.objectid = block_group->key.objectid;
229         key.offset = 0;
230         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
231         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
232         if (ret < 0)
233                 goto err;
234         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
235         if (ret < 0)
236                 goto err;
237         if (ret == 0) {
238                 leaf = path->nodes[0];
239                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
240                 if (key.objectid + key.offset > first_free)
241                         first_free = key.objectid + key.offset;
242         }
243         while(1) {
244                 leaf = path->nodes[0];
245                 slot = path->slots[0];
246                 if (slot >= btrfs_header_nritems(leaf)) {
247                         ret = btrfs_next_leaf(root, path);
248                         if (ret < 0)
249                                 goto err;
250                         if (ret == 0)
251                                 continue;
252                         else
253                                 break;
254                 }
255                 btrfs_item_key_to_cpu(leaf, &key, slot);
256                 if (key.objectid < block_group->key.objectid)
257                         goto next;
258
259                 if (key.objectid >= block_group->key.objectid +
260                     block_group->key.offset)
261                         break;
262
263                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
264                         if (!found) {
265                                 last = first_free;
266                                 found = 1;
267                         }
268
269                         add_new_free_space(block_group, root->fs_info, last,
270                                            key.objectid);
271
272                         last = key.objectid + key.offset;
273                 }
274 next:
275                 path->slots[0]++;
276         }
277
278         if (!found)
279                 last = first_free;
280
281         add_new_free_space(block_group, root->fs_info, last,
282                            block_group->key.objectid +
283                            block_group->key.offset);
284
285         block_group->cached = 1;
286         ret = 0;
287 err:
288         btrfs_free_path(path);
289         return ret;
290 }
291
292 /*
293  * return the block group that starts at or after bytenr
294  */
295 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
296                                                        btrfs_fs_info *info,
297                                                          u64 bytenr)
298 {
299         struct btrfs_block_group_cache *cache;
300
301         cache = block_group_cache_tree_search(info, bytenr, 0);
302
303         return cache;
304 }
305
306 /*
307  * return the block group that contains teh given bytenr
308  */
309 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
310                                                          btrfs_fs_info *info,
311                                                          u64 bytenr)
312 {
313         struct btrfs_block_group_cache *cache;
314
315         cache = block_group_cache_tree_search(info, bytenr, 1);
316
317         return cache;
318 }
319
320 static int noinline find_free_space(struct btrfs_root *root,
321                                     struct btrfs_block_group_cache **cache_ret,
322                                     u64 *start_ret, u64 num, int data)
323 {
324         int ret;
325         struct btrfs_block_group_cache *cache = *cache_ret;
326         struct btrfs_free_space *info = NULL;
327         u64 last;
328         u64 total_fs_bytes;
329         u64 search_start = *start_ret;
330
331         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
332         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
333
334         if (!cache)
335                 goto out;
336
337         last = max(search_start, cache->key.objectid);
338
339 again:
340         ret = cache_block_group(root, cache);
341         if (ret)
342                 goto out;
343
344         if (cache->ro || !block_group_bits(cache, data))
345                 goto new_group;
346
347         info = btrfs_find_free_space(cache, last, num);
348         if (info) {
349                 *start_ret = info->offset;
350                 return 0;
351         }
352
353 new_group:
354         last = cache->key.objectid + cache->key.offset;
355
356         cache = btrfs_lookup_first_block_group(root->fs_info, last);
357         if (!cache || cache->key.objectid >= total_fs_bytes)
358                 goto out;
359
360         *cache_ret = cache;
361         goto again;
362
363 out:
364         return -ENOSPC;
365 }
366
367 static u64 div_factor(u64 num, int factor)
368 {
369         if (factor == 10)
370                 return num;
371         num *= factor;
372         do_div(num, 10);
373         return num;
374 }
375
376 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
377                                                   u64 flags)
378 {
379         struct list_head *head = &info->space_info;
380         struct list_head *cur;
381         struct btrfs_space_info *found;
382         list_for_each(cur, head) {
383                 found = list_entry(cur, struct btrfs_space_info, list);
384                 if (found->flags == flags)
385                         return found;
386         }
387         return NULL;
388
389 }
390
391 static struct btrfs_block_group_cache *
392 __btrfs_find_block_group(struct btrfs_root *root,
393                          struct btrfs_block_group_cache *hint,
394                          u64 search_start, int data, int owner)
395 {
396         struct btrfs_block_group_cache *cache;
397         struct btrfs_block_group_cache *found_group = NULL;
398         struct btrfs_fs_info *info = root->fs_info;
399         struct btrfs_space_info *sinfo;
400         u64 used;
401         u64 last = 0;
402         u64 free_check;
403         int full_search = 0;
404         int factor = 10;
405         int wrapped = 0;
406
407         if (data & BTRFS_BLOCK_GROUP_METADATA)
408                 factor = 9;
409
410         if (search_start) {
411                 struct btrfs_block_group_cache *shint;
412                 shint = btrfs_lookup_first_block_group(info, search_start);
413                 if (shint && block_group_bits(shint, data) && !shint->ro) {
414                         spin_lock(&shint->lock);
415                         used = btrfs_block_group_used(&shint->item);
416                         if (used + shint->pinned <
417                             div_factor(shint->key.offset, factor)) {
418                                 spin_unlock(&shint->lock);
419                                 return shint;
420                         }
421                         spin_unlock(&shint->lock);
422                 }
423         }
424         if (hint && !hint->ro && block_group_bits(hint, data)) {
425                 spin_lock(&hint->lock);
426                 used = btrfs_block_group_used(&hint->item);
427                 if (used + hint->pinned <
428                     div_factor(hint->key.offset, factor)) {
429                         spin_unlock(&hint->lock);
430                         return hint;
431                 }
432                 spin_unlock(&hint->lock);
433                 last = hint->key.objectid + hint->key.offset;
434         } else {
435                 if (hint)
436                         last = max(hint->key.objectid, search_start);
437                 else
438                         last = search_start;
439         }
440         sinfo = __find_space_info(root->fs_info, data);
441         if (!sinfo)
442                 goto found;
443 again:
444         while(1) {
445                 struct list_head *l;
446
447                 cache = NULL;
448
449                 spin_lock(&sinfo->lock);
450                 list_for_each(l, &sinfo->block_groups) {
451                         struct btrfs_block_group_cache *entry;
452                         entry = list_entry(l, struct btrfs_block_group_cache,
453                                            list);
454                         if ((entry->key.objectid >= last) &&
455                             (!cache || (entry->key.objectid <
456                                         cache->key.objectid)))
457                                 cache = entry;
458                 }
459                 spin_unlock(&sinfo->lock);
460
461                 if (!cache)
462                         break;
463
464                 spin_lock(&cache->lock);
465                 last = cache->key.objectid + cache->key.offset;
466                 used = btrfs_block_group_used(&cache->item);
467
468                 if (!cache->ro && block_group_bits(cache, data)) {
469                         free_check = div_factor(cache->key.offset, factor);
470                         if (used + cache->pinned < free_check) {
471                                 found_group = cache;
472                                 spin_unlock(&cache->lock);
473                                 goto found;
474                         }
475                 }
476                 spin_unlock(&cache->lock);
477                 cond_resched();
478         }
479         if (!wrapped) {
480                 last = search_start;
481                 wrapped = 1;
482                 goto again;
483         }
484         if (!full_search && factor < 10) {
485                 last = search_start;
486                 full_search = 1;
487                 factor = 10;
488                 goto again;
489         }
490 found:
491         return found_group;
492 }
493
494 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
495                                                  struct btrfs_block_group_cache
496                                                  *hint, u64 search_start,
497                                                  int data, int owner)
498 {
499
500         struct btrfs_block_group_cache *ret;
501         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
502         return ret;
503 }
504
505 /* simple helper to search for an existing extent at a given offset */
506 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
507 {
508         int ret;
509         struct btrfs_key key;
510         struct btrfs_path *path;
511
512         path = btrfs_alloc_path();
513         BUG_ON(!path);
514         maybe_lock_mutex(root);
515         key.objectid = start;
516         key.offset = len;
517         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
518         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
519                                 0, 0);
520         maybe_unlock_mutex(root);
521         btrfs_free_path(path);
522         return ret;
523 }
524
525 /*
526  * Back reference rules.  Back refs have three main goals:
527  *
528  * 1) differentiate between all holders of references to an extent so that
529  *    when a reference is dropped we can make sure it was a valid reference
530  *    before freeing the extent.
531  *
532  * 2) Provide enough information to quickly find the holders of an extent
533  *    if we notice a given block is corrupted or bad.
534  *
535  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
536  *    maintenance.  This is actually the same as #2, but with a slightly
537  *    different use case.
538  *
539  * File extents can be referenced by:
540  *
541  * - multiple snapshots, subvolumes, or different generations in one subvol
542  * - different files inside a single subvolume
543  * - different offsets inside a file (bookend extents in file.c)
544  *
545  * The extent ref structure has fields for:
546  *
547  * - Objectid of the subvolume root
548  * - Generation number of the tree holding the reference
549  * - objectid of the file holding the reference
550  * - offset in the file corresponding to the key holding the reference
551  * - number of references holding by parent node (alway 1 for tree blocks)
552  *
553  * Btree leaf may hold multiple references to a file extent. In most cases,
554  * these references are from same file and the corresponding offsets inside
555  * the file are close together. So inode objectid and offset in file are
556  * just hints, they provide hints about where in the btree the references
557  * can be found and when we can stop searching.
558  *
559  * When a file extent is allocated the fields are filled in:
560  *     (root_key.objectid, trans->transid, inode objectid, offset in file, 1)
561  *
562  * When a leaf is cow'd new references are added for every file extent found
563  * in the leaf.  It looks similar to the create case, but trans->transid will
564  * be different when the block is cow'd.
565  *
566  *     (root_key.objectid, trans->transid, inode objectid, offset in file,
567  *      number of references in the leaf)
568  *
569  * Because inode objectid and offset in file are just hints, they are not
570  * used when backrefs are deleted. When a file extent is removed either
571  * during snapshot deletion or file truncation, we find the corresponding
572  * back back reference and check the following fields.
573  *
574  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf))
575  *
576  * Btree extents can be referenced by:
577  *
578  * - Different subvolumes
579  * - Different generations of the same subvolume
580  *
581  * When a tree block is created, back references are inserted:
582  *
583  * (root->root_key.objectid, trans->transid, level, 0, 1)
584  *
585  * When a tree block is cow'd, new back references are added for all the
586  * blocks it points to. If the tree block isn't in reference counted root,
587  * the old back references are removed. These new back references are of
588  * the form (trans->transid will have increased since creation):
589  *
590  * (root->root_key.objectid, trans->transid, level, 0, 1)
591  *
592  * When a backref is in deleting, the following fields are checked:
593  *
594  * if backref was for a tree root:
595  *     (btrfs_header_owner(itself), btrfs_header_generation(itself))
596  * else
597  *     (btrfs_header_owner(parent), btrfs_header_generation(parent))
598  *
599  * Back Reference Key composing:
600  *
601  * The key objectid corresponds to the first byte in the extent, the key
602  * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
603  * byte of parent extent. If a extent is tree root, the key offset is set
604  * to the key objectid.
605  */
606
607 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
608                                           struct btrfs_root *root,
609                                           struct btrfs_path *path, u64 bytenr,
610                                           u64 parent, u64 ref_root,
611                                           u64 ref_generation, int del)
612 {
613         struct btrfs_key key;
614         struct btrfs_extent_ref *ref;
615         struct extent_buffer *leaf;
616         int ret;
617
618         key.objectid = bytenr;
619         key.type = BTRFS_EXTENT_REF_KEY;
620         key.offset = parent;
621
622         ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
623         if (ret < 0)
624                 goto out;
625         if (ret > 0) {
626                 ret = -ENOENT;
627                 goto out;
628         }
629
630         leaf = path->nodes[0];
631         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
632         if (btrfs_ref_root(leaf, ref) != ref_root ||
633             btrfs_ref_generation(leaf, ref) != ref_generation) {
634                 ret = -EIO;
635                 WARN_ON(1);
636                 goto out;
637         }
638         ret = 0;
639 out:
640         return ret;
641 }
642
643 static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
644                                           struct btrfs_root *root,
645                                           struct btrfs_path *path,
646                                           u64 bytenr, u64 parent,
647                                           u64 ref_root, u64 ref_generation,
648                                           u64 owner_objectid, u64 owner_offset)
649 {
650         struct btrfs_key key;
651         struct extent_buffer *leaf;
652         struct btrfs_extent_ref *ref;
653         u32 num_refs;
654         int ret;
655
656         key.objectid = bytenr;
657         key.type = BTRFS_EXTENT_REF_KEY;
658         key.offset = parent;
659
660         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
661         if (ret == 0) {
662                 leaf = path->nodes[0];
663                 ref = btrfs_item_ptr(leaf, path->slots[0],
664                                      struct btrfs_extent_ref);
665                 btrfs_set_ref_root(leaf, ref, ref_root);
666                 btrfs_set_ref_generation(leaf, ref, ref_generation);
667                 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
668                 btrfs_set_ref_offset(leaf, ref, owner_offset);
669                 btrfs_set_ref_num_refs(leaf, ref, 1);
670         } else if (ret == -EEXIST) {
671                 u64 existing_owner;
672                 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
673                 leaf = path->nodes[0];
674                 ref = btrfs_item_ptr(leaf, path->slots[0],
675                                      struct btrfs_extent_ref);
676                 if (btrfs_ref_root(leaf, ref) != ref_root ||
677                     btrfs_ref_generation(leaf, ref) != ref_generation) {
678                         ret = -EIO;
679                         WARN_ON(1);
680                         goto out;
681                 }
682
683                 num_refs = btrfs_ref_num_refs(leaf, ref);
684                 BUG_ON(num_refs == 0);
685                 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
686
687                 existing_owner = btrfs_ref_objectid(leaf, ref);
688                 if (existing_owner == owner_objectid &&
689                     btrfs_ref_offset(leaf, ref) > owner_offset) {
690                         btrfs_set_ref_offset(leaf, ref, owner_offset);
691                 } else if (existing_owner != owner_objectid &&
692                            existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
693                         btrfs_set_ref_objectid(leaf, ref,
694                                         BTRFS_MULTIPLE_OBJECTIDS);
695                         btrfs_set_ref_offset(leaf, ref, 0);
696                 }
697                 ret = 0;
698         } else {
699                 goto out;
700         }
701         btrfs_mark_buffer_dirty(path->nodes[0]);
702 out:
703         btrfs_release_path(root, path);
704         return ret;
705 }
706
707 static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
708                                           struct btrfs_root *root,
709                                           struct btrfs_path *path)
710 {
711         struct extent_buffer *leaf;
712         struct btrfs_extent_ref *ref;
713         u32 num_refs;
714         int ret = 0;
715
716         leaf = path->nodes[0];
717         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
718         num_refs = btrfs_ref_num_refs(leaf, ref);
719         BUG_ON(num_refs == 0);
720         num_refs -= 1;
721         if (num_refs == 0) {
722                 ret = btrfs_del_item(trans, root, path);
723         } else {
724                 btrfs_set_ref_num_refs(leaf, ref, num_refs);
725                 btrfs_mark_buffer_dirty(leaf);
726         }
727         btrfs_release_path(root, path);
728         return ret;
729 }
730
731 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
732                                      struct btrfs_root *root, u64 bytenr,
733                                      u64 orig_parent, u64 parent,
734                                      u64 orig_root, u64 ref_root,
735                                      u64 orig_generation, u64 ref_generation,
736                                      u64 owner_objectid, u64 owner_offset)
737 {
738         int ret;
739         struct btrfs_root *extent_root = root->fs_info->extent_root;
740         struct btrfs_path *path;
741
742         if (root == root->fs_info->extent_root) {
743                 struct pending_extent_op *extent_op;
744                 u64 num_bytes;
745
746                 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
747                 num_bytes = btrfs_level_size(root, (int)owner_objectid);
748                 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
749                                 bytenr + num_bytes - 1, EXTENT_LOCKED, 0)) {
750                         u64 priv;
751                         ret = get_state_private(&root->fs_info->extent_ins,
752                                                 bytenr, &priv);
753                         BUG_ON(ret);
754                         extent_op = (struct pending_extent_op *)
755                                                         (unsigned long)priv;
756                         BUG_ON(extent_op->parent != orig_parent);
757                         BUG_ON(extent_op->generation != orig_generation);
758                         extent_op->parent = parent;
759                         extent_op->generation = ref_generation;
760                 } else {
761                         extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
762                         BUG_ON(!extent_op);
763
764                         extent_op->type = PENDING_BACKREF_UPDATE;
765                         extent_op->bytenr = bytenr;
766                         extent_op->num_bytes = num_bytes;
767                         extent_op->parent = parent;
768                         extent_op->orig_parent = orig_parent;
769                         extent_op->generation = ref_generation;
770                         extent_op->orig_generation = orig_generation;
771                         extent_op->level = (int)owner_objectid;
772
773                         set_extent_bits(&root->fs_info->extent_ins,
774                                         bytenr, bytenr + num_bytes - 1,
775                                         EXTENT_LOCKED, GFP_NOFS);
776                         set_state_private(&root->fs_info->extent_ins,
777                                           bytenr, (unsigned long)extent_op);
778                 }
779                 return 0;
780         }
781
782         path = btrfs_alloc_path();
783         if (!path)
784                 return -ENOMEM;
785         ret = lookup_extent_backref(trans, extent_root, path,
786                                     bytenr, orig_parent, orig_root,
787                                     orig_generation, 1);
788         if (ret)
789                 goto out;
790         ret = remove_extent_backref(trans, extent_root, path);
791         if (ret)
792                 goto out;
793         ret = insert_extent_backref(trans, extent_root, path, bytenr,
794                                     parent, ref_root, ref_generation,
795                                     owner_objectid, owner_offset);
796         BUG_ON(ret);
797         finish_current_insert(trans, extent_root);
798         del_pending_extents(trans, extent_root);
799 out:
800         btrfs_free_path(path);
801         return ret;
802 }
803
804 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
805                             struct btrfs_root *root, u64 bytenr,
806                             u64 orig_parent, u64 parent,
807                             u64 ref_root, u64 ref_generation,
808                             u64 owner_objectid, u64 owner_offset)
809 {
810         int ret;
811         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
812             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
813                 return 0;
814         maybe_lock_mutex(root);
815         ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
816                                         parent, ref_root, ref_root,
817                                         ref_generation, ref_generation,
818                                         owner_objectid, owner_offset);
819         maybe_unlock_mutex(root);
820         return ret;
821 }
822
823 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
824                                   struct btrfs_root *root, u64 bytenr,
825                                   u64 orig_parent, u64 parent,
826                                   u64 orig_root, u64 ref_root,
827                                   u64 orig_generation, u64 ref_generation,
828                                   u64 owner_objectid, u64 owner_offset)
829 {
830         struct btrfs_path *path;
831         int ret;
832         struct btrfs_key key;
833         struct extent_buffer *l;
834         struct btrfs_extent_item *item;
835         u32 refs;
836
837         path = btrfs_alloc_path();
838         if (!path)
839                 return -ENOMEM;
840
841         path->reada = 1;
842         key.objectid = bytenr;
843         key.type = BTRFS_EXTENT_ITEM_KEY;
844         key.offset = (u64)-1;
845
846         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
847                                 0, 1);
848         if (ret < 0)
849                 return ret;
850         BUG_ON(ret == 0 || path->slots[0] == 0);
851
852         path->slots[0]--;
853         l = path->nodes[0];
854
855         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
856         BUG_ON(key.objectid != bytenr);
857         BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
858
859         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
860         refs = btrfs_extent_refs(l, item);
861         btrfs_set_extent_refs(l, item, refs + 1);
862         btrfs_mark_buffer_dirty(path->nodes[0]);
863
864         btrfs_release_path(root->fs_info->extent_root, path);
865
866         path->reada = 1;
867         ret = insert_extent_backref(trans, root->fs_info->extent_root,
868                                     path, bytenr, parent,
869                                     ref_root, ref_generation,
870                                     owner_objectid, owner_offset);
871         BUG_ON(ret);
872         finish_current_insert(trans, root->fs_info->extent_root);
873         del_pending_extents(trans, root->fs_info->extent_root);
874
875         btrfs_free_path(path);
876         return 0;
877 }
878
879 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
880                          struct btrfs_root *root,
881                          u64 bytenr, u64 num_bytes, u64 parent,
882                          u64 ref_root, u64 ref_generation,
883                          u64 owner_objectid, u64 owner_offset)
884 {
885         int ret;
886         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
887             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
888                 return 0;
889         maybe_lock_mutex(root);
890         ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
891                                      0, ref_root, 0, ref_generation,
892                                      owner_objectid, owner_offset);
893         maybe_unlock_mutex(root);
894         return ret;
895 }
896
897 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
898                          struct btrfs_root *root)
899 {
900         finish_current_insert(trans, root->fs_info->extent_root);
901         del_pending_extents(trans, root->fs_info->extent_root);
902         return 0;
903 }
904
905 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
906                             struct btrfs_root *root, u64 bytenr,
907                             u64 num_bytes, u32 *refs)
908 {
909         struct btrfs_path *path;
910         int ret;
911         struct btrfs_key key;
912         struct extent_buffer *l;
913         struct btrfs_extent_item *item;
914
915         WARN_ON(num_bytes < root->sectorsize);
916         path = btrfs_alloc_path();
917         path->reada = 1;
918         key.objectid = bytenr;
919         key.offset = num_bytes;
920         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
921         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
922                                 0, 0);
923         if (ret < 0)
924                 goto out;
925         if (ret != 0) {
926                 btrfs_print_leaf(root, path->nodes[0]);
927                 printk("failed to find block number %Lu\n", bytenr);
928                 BUG();
929         }
930         l = path->nodes[0];
931         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
932         *refs = btrfs_extent_refs(l, item);
933 out:
934         btrfs_free_path(path);
935         return 0;
936 }
937
938 static int get_reference_status(struct btrfs_root *root, u64 bytenr,
939                                 u64 parent_gen, u64 ref_objectid,
940                                 u64 *min_generation, u32 *ref_count)
941 {
942         struct btrfs_root *extent_root = root->fs_info->extent_root;
943         struct btrfs_path *path;
944         struct extent_buffer *leaf;
945         struct btrfs_extent_ref *ref_item;
946         struct btrfs_key key;
947         struct btrfs_key found_key;
948         u64 root_objectid = root->root_key.objectid;
949         u64 ref_generation;
950         u32 nritems;
951         int ret;
952
953         key.objectid = bytenr;
954         key.offset = (u64)-1;
955         key.type = BTRFS_EXTENT_ITEM_KEY;
956
957         path = btrfs_alloc_path();
958         mutex_lock(&root->fs_info->alloc_mutex);
959         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
960         if (ret < 0)
961                 goto out;
962         BUG_ON(ret == 0);
963         if (ret < 0 || path->slots[0] == 0)
964                 goto out;
965
966         path->slots[0]--;
967         leaf = path->nodes[0];
968         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
969
970         if (found_key.objectid != bytenr ||
971             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
972                 ret = 1;
973                 goto out;
974         }
975
976         *ref_count = 0;
977         *min_generation = (u64)-1;
978
979         while (1) {
980                 leaf = path->nodes[0];
981                 nritems = btrfs_header_nritems(leaf);
982                 if (path->slots[0] >= nritems) {
983                         ret = btrfs_next_leaf(extent_root, path);
984                         if (ret < 0)
985                                 goto out;
986                         if (ret == 0)
987                                 continue;
988                         break;
989                 }
990                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
991                 if (found_key.objectid != bytenr)
992                         break;
993
994                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
995                         path->slots[0]++;
996                         continue;
997                 }
998
999                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1000                                           struct btrfs_extent_ref);
1001                 ref_generation = btrfs_ref_generation(leaf, ref_item);
1002                 /*
1003                  * For (parent_gen > 0 && parent_gen > ref_generation):
1004                  *
1005                  * we reach here through the oldest root, therefore
1006                  * all other reference from same snapshot should have
1007                  * a larger generation.
1008                  */
1009                 if ((root_objectid != btrfs_ref_root(leaf, ref_item)) ||
1010                     (parent_gen > 0 && parent_gen > ref_generation) ||
1011                     (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
1012                      ref_objectid != btrfs_ref_objectid(leaf, ref_item))) {
1013                         *ref_count = 2;
1014                         break;
1015                 }
1016
1017                 *ref_count = 1;
1018                 if (*min_generation > ref_generation)
1019                         *min_generation = ref_generation;
1020
1021                 path->slots[0]++;
1022         }
1023         ret = 0;
1024 out:
1025         mutex_unlock(&root->fs_info->alloc_mutex);
1026         btrfs_free_path(path);
1027         return ret;
1028 }
1029
1030 int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
1031                            struct btrfs_root *root,
1032                            struct btrfs_key *key, u64 bytenr)
1033 {
1034         struct btrfs_root *old_root;
1035         struct btrfs_path *path = NULL;
1036         struct extent_buffer *eb;
1037         struct btrfs_file_extent_item *item;
1038         u64 ref_generation;
1039         u64 min_generation;
1040         u64 extent_start;
1041         u32 ref_count;
1042         int level;
1043         int ret;
1044
1045         BUG_ON(trans == NULL);
1046         BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
1047         ret = get_reference_status(root, bytenr, 0, key->objectid,
1048                                    &min_generation, &ref_count);
1049         if (ret)
1050                 return ret;
1051
1052         if (ref_count != 1)
1053                 return 1;
1054
1055         old_root = root->dirty_root->root;
1056         ref_generation = old_root->root_key.offset;
1057
1058         /* all references are created in running transaction */
1059         if (min_generation > ref_generation) {
1060                 ret = 0;
1061                 goto out;
1062         }
1063
1064         path = btrfs_alloc_path();
1065         if (!path) {
1066                 ret = -ENOMEM;
1067                 goto out;
1068         }
1069
1070         path->skip_locking = 1;
1071         /* if no item found, the extent is referenced by other snapshot */
1072         ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0);
1073         if (ret)
1074                 goto out;
1075
1076         eb = path->nodes[0];
1077         item = btrfs_item_ptr(eb, path->slots[0],
1078                               struct btrfs_file_extent_item);
1079         if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG ||
1080             btrfs_file_extent_disk_bytenr(eb, item) != bytenr) {
1081                 ret = 1;
1082                 goto out;
1083         }
1084
1085         for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) {
1086                 if (level >= 0) {
1087                         eb = path->nodes[level];
1088                         if (!eb)
1089                                 continue;
1090                         extent_start = eb->start;
1091                 } else
1092                         extent_start = bytenr;
1093
1094                 ret = get_reference_status(root, extent_start, ref_generation,
1095                                            0, &min_generation, &ref_count);
1096                 if (ret)
1097                         goto out;
1098
1099                 if (ref_count != 1) {
1100                         ret = 1;
1101                         goto out;
1102                 }
1103                 if (level >= 0)
1104                         ref_generation = btrfs_header_generation(eb);
1105         }
1106         ret = 0;
1107 out:
1108         if (path)
1109                 btrfs_free_path(path);
1110         return ret;
1111 }
1112
1113 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1114                     struct extent_buffer *buf, u32 nr_extents)
1115 {
1116         u32 nritems;
1117         struct btrfs_key key;
1118         struct btrfs_file_extent_item *fi;
1119         int i;
1120         int level;
1121         int ret = 0;
1122
1123         if (!root->ref_cows)
1124                 return 0;
1125
1126         level = btrfs_header_level(buf);
1127         nritems = btrfs_header_nritems(buf);
1128
1129         if (level == 0) {
1130                 struct btrfs_leaf_ref *ref;
1131                 struct btrfs_extent_info *info;
1132
1133                 ref = btrfs_alloc_leaf_ref(root, nr_extents);
1134                 if (!ref) {
1135                         ret = -ENOMEM;
1136                         goto out;
1137                 }
1138
1139                 ref->root_gen = root->root_key.offset;
1140                 ref->bytenr = buf->start;
1141                 ref->owner = btrfs_header_owner(buf);
1142                 ref->generation = btrfs_header_generation(buf);
1143                 ref->nritems = nr_extents;
1144                 info = ref->extents;
1145
1146                 for (i = 0; nr_extents > 0 && i < nritems; i++) {
1147                         u64 disk_bytenr;
1148                         btrfs_item_key_to_cpu(buf, &key, i);
1149                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1150                                 continue;
1151                         fi = btrfs_item_ptr(buf, i,
1152                                             struct btrfs_file_extent_item);
1153                         if (btrfs_file_extent_type(buf, fi) ==
1154                             BTRFS_FILE_EXTENT_INLINE)
1155                                 continue;
1156                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1157                         if (disk_bytenr == 0)
1158                                 continue;
1159
1160                         info->bytenr = disk_bytenr;
1161                         info->num_bytes =
1162                                 btrfs_file_extent_disk_num_bytes(buf, fi);
1163                         info->objectid = key.objectid;
1164                         info->offset = key.offset;
1165                         info++;
1166                 }
1167
1168                 BUG_ON(!root->ref_tree);
1169                 ret = btrfs_add_leaf_ref(root, ref);
1170                 WARN_ON(ret);
1171                 btrfs_free_leaf_ref(root, ref);
1172         }
1173 out:
1174         return ret;
1175 }
1176
1177 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1178                   struct extent_buffer *orig_buf, struct extent_buffer *buf,
1179                   u32 *nr_extents)
1180 {
1181         u64 bytenr;
1182         u64 ref_root;
1183         u64 orig_root;
1184         u64 ref_generation;
1185         u64 orig_generation;
1186         u32 nritems;
1187         u32 nr_file_extents = 0;
1188         struct btrfs_key key;
1189         struct btrfs_file_extent_item *fi;
1190         int i;
1191         int level;
1192         int ret = 0;
1193         int faili = 0;
1194         int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1195                             u64, u64, u64, u64, u64, u64, u64, u64, u64);
1196
1197         ref_root = btrfs_header_owner(buf);
1198         ref_generation = btrfs_header_generation(buf);
1199         orig_root = btrfs_header_owner(orig_buf);
1200         orig_generation = btrfs_header_generation(orig_buf);
1201
1202         nritems = btrfs_header_nritems(buf);
1203         level = btrfs_header_level(buf);
1204
1205         if (root->ref_cows) {
1206                 process_func = __btrfs_inc_extent_ref;
1207         } else {
1208                 if (level == 0 &&
1209                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1210                         goto out;
1211                 if (level != 0 &&
1212                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1213                         goto out;
1214                 process_func = __btrfs_update_extent_ref;
1215         }
1216
1217         for (i = 0; i < nritems; i++) {
1218                 cond_resched();
1219                 if (level == 0) {
1220                         btrfs_item_key_to_cpu(buf, &key, i);
1221                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1222                                 continue;
1223                         fi = btrfs_item_ptr(buf, i,
1224                                             struct btrfs_file_extent_item);
1225                         if (btrfs_file_extent_type(buf, fi) ==
1226                             BTRFS_FILE_EXTENT_INLINE)
1227                                 continue;
1228                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1229                         if (bytenr == 0)
1230                                 continue;
1231
1232                         nr_file_extents++;
1233
1234                         maybe_lock_mutex(root);
1235                         ret = process_func(trans, root, bytenr,
1236                                            orig_buf->start, buf->start,
1237                                            orig_root, ref_root,
1238                                            orig_generation, ref_generation,
1239                                            key.objectid, key.offset);
1240                         maybe_unlock_mutex(root);
1241
1242                         if (ret) {
1243                                 faili = i;
1244                                 WARN_ON(1);
1245                                 goto fail;
1246                         }
1247                 } else {
1248                         bytenr = btrfs_node_blockptr(buf, i);
1249                         maybe_lock_mutex(root);
1250                         ret = process_func(trans, root, bytenr,
1251                                            orig_buf->start, buf->start,
1252                                            orig_root, ref_root,
1253                                            orig_generation, ref_generation,
1254                                            level - 1, 0);
1255                         maybe_unlock_mutex(root);
1256                         if (ret) {
1257                                 faili = i;
1258                                 WARN_ON(1);
1259                                 goto fail;
1260                         }
1261                 }
1262         }
1263 out:
1264         if (nr_extents) {
1265                 if (level == 0)
1266                         *nr_extents = nr_file_extents;
1267                 else
1268                         *nr_extents = nritems;
1269         }
1270         return 0;
1271 fail:
1272         WARN_ON(1);
1273         return ret;
1274 }
1275
1276 int btrfs_update_ref(struct btrfs_trans_handle *trans,
1277                      struct btrfs_root *root, struct extent_buffer *orig_buf,
1278                      struct extent_buffer *buf, int start_slot, int nr)
1279
1280 {
1281         u64 bytenr;
1282         u64 ref_root;
1283         u64 orig_root;
1284         u64 ref_generation;
1285         u64 orig_generation;
1286         struct btrfs_key key;
1287         struct btrfs_file_extent_item *fi;
1288         int i;
1289         int ret;
1290         int slot;
1291         int level;
1292
1293         BUG_ON(start_slot < 0);
1294         BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1295
1296         ref_root = btrfs_header_owner(buf);
1297         ref_generation = btrfs_header_generation(buf);
1298         orig_root = btrfs_header_owner(orig_buf);
1299         orig_generation = btrfs_header_generation(orig_buf);
1300         level = btrfs_header_level(buf);
1301
1302         if (!root->ref_cows) {
1303                 if (level == 0 &&
1304                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1305                         return 0;
1306                 if (level != 0 &&
1307                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1308                         return 0;
1309         }
1310
1311         for (i = 0, slot = start_slot; i < nr; i++, slot++) {
1312                 cond_resched();
1313                 if (level == 0) {
1314                         btrfs_item_key_to_cpu(buf, &key, slot);
1315                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1316                                 continue;
1317                         fi = btrfs_item_ptr(buf, slot,
1318                                             struct btrfs_file_extent_item);
1319                         if (btrfs_file_extent_type(buf, fi) ==
1320                             BTRFS_FILE_EXTENT_INLINE)
1321                                 continue;
1322                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1323                         if (bytenr == 0)
1324                                 continue;
1325                         maybe_lock_mutex(root);
1326                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1327                                             orig_buf->start, buf->start,
1328                                             orig_root, ref_root,
1329                                             orig_generation, ref_generation,
1330                                             key.objectid, key.offset);
1331                         maybe_unlock_mutex(root);
1332                         if (ret)
1333                                 goto fail;
1334                 } else {
1335                         bytenr = btrfs_node_blockptr(buf, slot);
1336                         maybe_lock_mutex(root);
1337                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1338                                             orig_buf->start, buf->start,
1339                                             orig_root, ref_root,
1340                                             orig_generation, ref_generation,
1341                                             level - 1, 0);
1342                         maybe_unlock_mutex(root);
1343                         if (ret)
1344                                 goto fail;
1345                 }
1346         }
1347         return 0;
1348 fail:
1349         WARN_ON(1);
1350         return -1;
1351 }
1352
1353 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1354                                  struct btrfs_root *root,
1355                                  struct btrfs_path *path,
1356                                  struct btrfs_block_group_cache *cache)
1357 {
1358         int ret;
1359         int pending_ret;
1360         struct btrfs_root *extent_root = root->fs_info->extent_root;
1361         unsigned long bi;
1362         struct extent_buffer *leaf;
1363
1364         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1365         if (ret < 0)
1366                 goto fail;
1367         BUG_ON(ret);
1368
1369         leaf = path->nodes[0];
1370         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1371         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1372         btrfs_mark_buffer_dirty(leaf);
1373         btrfs_release_path(extent_root, path);
1374 fail:
1375         finish_current_insert(trans, extent_root);
1376         pending_ret = del_pending_extents(trans, extent_root);
1377         if (ret)
1378                 return ret;
1379         if (pending_ret)
1380                 return pending_ret;
1381         return 0;
1382
1383 }
1384
1385 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1386                                    struct btrfs_root *root)
1387 {
1388         struct btrfs_block_group_cache *cache, *entry;
1389         struct rb_node *n;
1390         int err = 0;
1391         int werr = 0;
1392         struct btrfs_path *path;
1393         u64 last = 0;
1394
1395         path = btrfs_alloc_path();
1396         if (!path)
1397                 return -ENOMEM;
1398
1399         mutex_lock(&root->fs_info->alloc_mutex);
1400         while(1) {
1401                 cache = NULL;
1402                 spin_lock(&root->fs_info->block_group_cache_lock);
1403                 for (n = rb_first(&root->fs_info->block_group_cache_tree);
1404                      n; n = rb_next(n)) {
1405                         entry = rb_entry(n, struct btrfs_block_group_cache,
1406                                          cache_node);
1407                         if (entry->dirty) {
1408                                 cache = entry;
1409                                 break;
1410                         }
1411                 }
1412                 spin_unlock(&root->fs_info->block_group_cache_lock);
1413
1414                 if (!cache)
1415                         break;
1416
1417                 last += cache->key.offset;
1418
1419                 err = write_one_cache_group(trans, root,
1420                                             path, cache);
1421                 /*
1422                  * if we fail to write the cache group, we want
1423                  * to keep it marked dirty in hopes that a later
1424                  * write will work
1425                  */
1426                 if (err) {
1427                         werr = err;
1428                         continue;
1429                 }
1430
1431                 cache->dirty = 0;
1432         }
1433         btrfs_free_path(path);
1434         mutex_unlock(&root->fs_info->alloc_mutex);
1435         return werr;
1436 }
1437
1438 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1439                              u64 total_bytes, u64 bytes_used,
1440                              struct btrfs_space_info **space_info)
1441 {
1442         struct btrfs_space_info *found;
1443
1444         found = __find_space_info(info, flags);
1445         if (found) {
1446                 found->total_bytes += total_bytes;
1447                 found->bytes_used += bytes_used;
1448                 found->full = 0;
1449                 *space_info = found;
1450                 return 0;
1451         }
1452         found = kmalloc(sizeof(*found), GFP_NOFS);
1453         if (!found)
1454                 return -ENOMEM;
1455
1456         list_add(&found->list, &info->space_info);
1457         INIT_LIST_HEAD(&found->block_groups);
1458         spin_lock_init(&found->lock);
1459         found->flags = flags;
1460         found->total_bytes = total_bytes;
1461         found->bytes_used = bytes_used;
1462         found->bytes_pinned = 0;
1463         found->full = 0;
1464         found->force_alloc = 0;
1465         *space_info = found;
1466         return 0;
1467 }
1468
1469 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1470 {
1471         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1472                                    BTRFS_BLOCK_GROUP_RAID1 |
1473                                    BTRFS_BLOCK_GROUP_RAID10 |
1474                                    BTRFS_BLOCK_GROUP_DUP);
1475         if (extra_flags) {
1476                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1477                         fs_info->avail_data_alloc_bits |= extra_flags;
1478                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1479                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1480                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1481                         fs_info->avail_system_alloc_bits |= extra_flags;
1482         }
1483 }
1484
1485 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1486 {
1487         u64 num_devices = root->fs_info->fs_devices->num_devices;
1488
1489         if (num_devices == 1)
1490                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1491         if (num_devices < 4)
1492                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1493
1494         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1495             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1496                       BTRFS_BLOCK_GROUP_RAID10))) {
1497                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1498         }
1499
1500         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1501             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1502                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1503         }
1504
1505         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1506             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1507              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1508              (flags & BTRFS_BLOCK_GROUP_DUP)))
1509                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1510         return flags;
1511 }
1512
1513 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1514                           struct btrfs_root *extent_root, u64 alloc_bytes,
1515                           u64 flags, int force)
1516 {
1517         struct btrfs_space_info *space_info;
1518         u64 thresh;
1519         u64 start;
1520         u64 num_bytes;
1521         int ret = 0;
1522
1523         flags = reduce_alloc_profile(extent_root, flags);
1524
1525         space_info = __find_space_info(extent_root->fs_info, flags);
1526         if (!space_info) {
1527                 ret = update_space_info(extent_root->fs_info, flags,
1528                                         0, 0, &space_info);
1529                 BUG_ON(ret);
1530         }
1531         BUG_ON(!space_info);
1532
1533         if (space_info->force_alloc) {
1534                 force = 1;
1535                 space_info->force_alloc = 0;
1536         }
1537         if (space_info->full)
1538                 goto out;
1539
1540         thresh = div_factor(space_info->total_bytes, 6);
1541         if (!force &&
1542            (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1543             thresh)
1544                 goto out;
1545
1546         mutex_lock(&extent_root->fs_info->chunk_mutex);
1547         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1548         if (ret == -ENOSPC) {
1549 printk("space info full %Lu\n", flags);
1550                 space_info->full = 1;
1551                 goto out_unlock;
1552         }
1553         BUG_ON(ret);
1554
1555         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1556                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1557         BUG_ON(ret);
1558
1559 out_unlock:
1560         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1561 out:
1562         return ret;
1563 }
1564
1565 static int update_block_group(struct btrfs_trans_handle *trans,
1566                               struct btrfs_root *root,
1567                               u64 bytenr, u64 num_bytes, int alloc,
1568                               int mark_free)
1569 {
1570         struct btrfs_block_group_cache *cache;
1571         struct btrfs_fs_info *info = root->fs_info;
1572         u64 total = num_bytes;
1573         u64 old_val;
1574         u64 byte_in_group;
1575
1576         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1577         while(total) {
1578                 cache = btrfs_lookup_block_group(info, bytenr);
1579                 if (!cache) {
1580                         return -1;
1581                 }
1582                 byte_in_group = bytenr - cache->key.objectid;
1583                 WARN_ON(byte_in_group > cache->key.offset);
1584
1585                 spin_lock(&cache->lock);
1586                 cache->dirty = 1;
1587                 old_val = btrfs_block_group_used(&cache->item);
1588                 num_bytes = min(total, cache->key.offset - byte_in_group);
1589                 if (alloc) {
1590                         old_val += num_bytes;
1591                         cache->space_info->bytes_used += num_bytes;
1592                         btrfs_set_block_group_used(&cache->item, old_val);
1593                         spin_unlock(&cache->lock);
1594                 } else {
1595                         old_val -= num_bytes;
1596                         cache->space_info->bytes_used -= num_bytes;
1597                         btrfs_set_block_group_used(&cache->item, old_val);
1598                         spin_unlock(&cache->lock);
1599                         if (mark_free) {
1600                                 int ret;
1601                                 ret = btrfs_add_free_space(cache, bytenr,
1602                                                            num_bytes);
1603                                 if (ret)
1604                                         return -1;
1605                         }
1606                 }
1607                 total -= num_bytes;
1608                 bytenr += num_bytes;
1609         }
1610         return 0;
1611 }
1612
1613 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1614 {
1615         struct btrfs_block_group_cache *cache;
1616
1617         cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
1618         if (!cache)
1619                 return 0;
1620
1621         return cache->key.objectid;
1622 }
1623
1624
1625 int btrfs_update_pinned_extents(struct btrfs_root *root,
1626                                 u64 bytenr, u64 num, int pin)
1627 {
1628         u64 len;
1629         struct btrfs_block_group_cache *cache;
1630         struct btrfs_fs_info *fs_info = root->fs_info;
1631
1632         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1633         if (pin) {
1634                 set_extent_dirty(&fs_info->pinned_extents,
1635                                 bytenr, bytenr + num - 1, GFP_NOFS);
1636         } else {
1637                 clear_extent_dirty(&fs_info->pinned_extents,
1638                                 bytenr, bytenr + num - 1, GFP_NOFS);
1639         }
1640         while (num > 0) {
1641                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1642                 if (!cache) {
1643                         u64 first = first_logical_byte(root, bytenr);
1644                         WARN_ON(first < bytenr);
1645                         len = min(first - bytenr, num);
1646                 } else {
1647                         len = min(num, cache->key.offset -
1648                                   (bytenr - cache->key.objectid));
1649                 }
1650                 if (pin) {
1651                         if (cache) {
1652                                 spin_lock(&cache->lock);
1653                                 cache->pinned += len;
1654                                 cache->space_info->bytes_pinned += len;
1655                                 spin_unlock(&cache->lock);
1656                         }
1657                         fs_info->total_pinned += len;
1658                 } else {
1659                         if (cache) {
1660                                 spin_lock(&cache->lock);
1661                                 cache->pinned -= len;
1662                                 cache->space_info->bytes_pinned -= len;
1663                                 spin_unlock(&cache->lock);
1664                         }
1665                         fs_info->total_pinned -= len;
1666                 }
1667                 bytenr += len;
1668                 num -= len;
1669         }
1670         return 0;
1671 }
1672
1673 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1674 {
1675         u64 last = 0;
1676         u64 start;
1677         u64 end;
1678         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1679         int ret;
1680
1681         while(1) {
1682                 ret = find_first_extent_bit(pinned_extents, last,
1683                                             &start, &end, EXTENT_DIRTY);
1684                 if (ret)
1685                         break;
1686                 set_extent_dirty(copy, start, end, GFP_NOFS);
1687                 last = end + 1;
1688         }
1689         return 0;
1690 }
1691
1692 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1693                                struct btrfs_root *root,
1694                                struct extent_io_tree *unpin)
1695 {
1696         u64 start;
1697         u64 end;
1698         int ret;
1699         struct btrfs_block_group_cache *cache;
1700
1701         mutex_lock(&root->fs_info->alloc_mutex);
1702         while(1) {
1703                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1704                                             EXTENT_DIRTY);
1705                 if (ret)
1706                         break;
1707                 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
1708                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1709                 cache = btrfs_lookup_block_group(root->fs_info, start);
1710                 if (cache->cached)
1711                         btrfs_add_free_space(cache, start, end - start + 1);
1712                 if (need_resched()) {
1713                         mutex_unlock(&root->fs_info->alloc_mutex);
1714                         cond_resched();
1715                         mutex_lock(&root->fs_info->alloc_mutex);
1716                 }
1717         }
1718         mutex_unlock(&root->fs_info->alloc_mutex);
1719         return 0;
1720 }
1721
1722 static int finish_current_insert(struct btrfs_trans_handle *trans,
1723                                  struct btrfs_root *extent_root)
1724 {
1725         u64 start;
1726         u64 end;
1727         u64 priv;
1728         struct btrfs_fs_info *info = extent_root->fs_info;
1729         struct btrfs_path *path;
1730         struct btrfs_extent_ref *ref;
1731         struct pending_extent_op *extent_op;
1732         struct btrfs_key key;
1733         struct btrfs_extent_item extent_item;
1734         int ret;
1735         int err = 0;
1736
1737         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1738         btrfs_set_stack_extent_refs(&extent_item, 1);
1739         path = btrfs_alloc_path();
1740
1741         while(1) {
1742                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1743                                             &end, EXTENT_LOCKED);
1744                 if (ret)
1745                         break;
1746
1747                 ret = get_state_private(&info->extent_ins, start, &priv);
1748                 BUG_ON(ret);
1749                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
1750
1751                 if (extent_op->type == PENDING_EXTENT_INSERT) {
1752                         key.objectid = start;
1753                         key.offset = end + 1 - start;
1754                         key.type = BTRFS_EXTENT_ITEM_KEY;
1755                         err = btrfs_insert_item(trans, extent_root, &key,
1756                                         &extent_item, sizeof(extent_item));
1757                         BUG_ON(err);
1758
1759                         clear_extent_bits(&info->extent_ins, start, end,
1760                                           EXTENT_LOCKED, GFP_NOFS);
1761
1762                         err = insert_extent_backref(trans, extent_root, path,
1763                                                 start, extent_op->parent,
1764                                                 extent_root->root_key.objectid,
1765                                                 extent_op->generation,
1766                                                 extent_op->level, 0);
1767                         BUG_ON(err);
1768                 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
1769                         err = lookup_extent_backref(trans, extent_root, path,
1770                                                 start, extent_op->orig_parent,
1771                                                 extent_root->root_key.objectid,
1772                                                 extent_op->orig_generation, 0);
1773                         BUG_ON(err);
1774
1775                         clear_extent_bits(&info->extent_ins, start, end,
1776                                           EXTENT_LOCKED, GFP_NOFS);
1777
1778                         key.objectid = start;
1779                         key.offset = extent_op->parent;
1780                         key.type = BTRFS_EXTENT_REF_KEY;
1781                         err = btrfs_set_item_key_safe(trans, extent_root, path,
1782                                                       &key);
1783                         BUG_ON(err);
1784                         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1785                                              struct btrfs_extent_ref);
1786                         btrfs_set_ref_generation(path->nodes[0], ref,
1787                                                  extent_op->generation);
1788                         btrfs_mark_buffer_dirty(path->nodes[0]);
1789                         btrfs_release_path(extent_root, path);
1790                 } else {
1791                         BUG_ON(1);
1792                 }
1793                 kfree(extent_op);
1794
1795                 if (need_resched()) {
1796                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
1797                         cond_resched();
1798                         mutex_lock(&extent_root->fs_info->alloc_mutex);
1799                 }
1800         }
1801         btrfs_free_path(path);
1802         return 0;
1803 }
1804
1805 static int pin_down_bytes(struct btrfs_trans_handle *trans,
1806                           struct btrfs_root *root,
1807                           u64 bytenr, u64 num_bytes, int is_data)
1808 {
1809         int err = 0;
1810         struct extent_buffer *buf;
1811
1812         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1813         if (is_data)
1814                 goto pinit;
1815
1816         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1817         if (!buf)
1818                 goto pinit;
1819
1820         /* we can reuse a block if it hasn't been written
1821          * and it is from this transaction.  We can't
1822          * reuse anything from the tree log root because
1823          * it has tiny sub-transactions.
1824          */
1825         if (btrfs_buffer_uptodate(buf, 0) &&
1826             btrfs_try_tree_lock(buf)) {
1827                 u64 header_owner = btrfs_header_owner(buf);
1828                 u64 header_transid = btrfs_header_generation(buf);
1829                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
1830                     header_transid == trans->transid &&
1831                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
1832                         clean_tree_block(NULL, root, buf);
1833                         btrfs_tree_unlock(buf);
1834                         free_extent_buffer(buf);
1835                         return 1;
1836                 }
1837                 btrfs_tree_unlock(buf);
1838         }
1839         free_extent_buffer(buf);
1840 pinit:
1841         btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
1842
1843         BUG_ON(err < 0);
1844         return 0;
1845 }
1846
1847 /*
1848  * remove an extent from the root, returns 0 on success
1849  */
1850 static int __free_extent(struct btrfs_trans_handle *trans,
1851                          struct btrfs_root *root,
1852                          u64 bytenr, u64 num_bytes, u64 parent,
1853                          u64 root_objectid, u64 ref_generation,
1854                          u64 owner_objectid, u64 owner_offset,
1855                          int pin, int mark_free)
1856 {
1857         struct btrfs_path *path;
1858         struct btrfs_key key;
1859         struct btrfs_fs_info *info = root->fs_info;
1860         struct btrfs_root *extent_root = info->extent_root;
1861         struct extent_buffer *leaf;
1862         int ret;
1863         int extent_slot = 0;
1864         int found_extent = 0;
1865         int num_to_del = 1;
1866         struct btrfs_extent_item *ei;
1867         u32 refs;
1868
1869         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1870         key.objectid = bytenr;
1871         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1872         key.offset = num_bytes;
1873         path = btrfs_alloc_path();
1874         if (!path)
1875                 return -ENOMEM;
1876
1877         path->reada = 1;
1878         ret = lookup_extent_backref(trans, extent_root, path, bytenr, parent,
1879                                     root_objectid, ref_generation, 1);
1880         if (ret == 0) {
1881                 struct btrfs_key found_key;
1882                 extent_slot = path->slots[0];
1883                 while(extent_slot > 0) {
1884                         extent_slot--;
1885                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1886                                               extent_slot);
1887                         if (found_key.objectid != bytenr)
1888                                 break;
1889                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1890                             found_key.offset == num_bytes) {
1891                                 found_extent = 1;
1892                                 break;
1893                         }
1894                         if (path->slots[0] - extent_slot > 5)
1895                                 break;
1896                 }
1897                 if (!found_extent) {
1898                         ret = remove_extent_backref(trans, extent_root, path);
1899                         BUG_ON(ret);
1900                         btrfs_release_path(extent_root, path);
1901                         ret = btrfs_search_slot(trans, extent_root,
1902                                                 &key, path, -1, 1);
1903                         BUG_ON(ret);
1904                         extent_slot = path->slots[0];
1905                 }
1906         } else {
1907                 btrfs_print_leaf(extent_root, path->nodes[0]);
1908                 WARN_ON(1);
1909                 printk("Unable to find ref byte nr %Lu root %Lu "
1910                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1911                        root_objectid, ref_generation, owner_objectid,
1912                        owner_offset);
1913         }
1914
1915         leaf = path->nodes[0];
1916         ei = btrfs_item_ptr(leaf, extent_slot,
1917                             struct btrfs_extent_item);
1918         refs = btrfs_extent_refs(leaf, ei);
1919         BUG_ON(refs == 0);
1920         refs -= 1;
1921         btrfs_set_extent_refs(leaf, ei, refs);
1922
1923         btrfs_mark_buffer_dirty(leaf);
1924
1925         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1926                 struct btrfs_extent_ref *ref;
1927                 ref = btrfs_item_ptr(leaf, path->slots[0],
1928                                      struct btrfs_extent_ref);
1929                 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
1930                 /* if the back ref and the extent are next to each other
1931                  * they get deleted below in one shot
1932                  */
1933                 path->slots[0] = extent_slot;
1934                 num_to_del = 2;
1935         } else if (found_extent) {
1936                 /* otherwise delete the extent back ref */
1937                 ret = remove_extent_backref(trans, extent_root, path);
1938                 BUG_ON(ret);
1939                 /* if refs are 0, we need to setup the path for deletion */
1940                 if (refs == 0) {
1941                         btrfs_release_path(extent_root, path);
1942                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1943                                                 -1, 1);
1944                         BUG_ON(ret);
1945                 }
1946         }
1947
1948         if (refs == 0) {
1949                 u64 super_used;
1950                 u64 root_used;
1951 #ifdef BIO_RW_DISCARD
1952                 u64 map_length = num_bytes;
1953                 struct btrfs_multi_bio *multi = NULL;
1954 #endif
1955
1956                 if (pin) {
1957                         ret = pin_down_bytes(trans, root, bytenr, num_bytes,
1958                                 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
1959                         if (ret > 0)
1960                                 mark_free = 1;
1961                         BUG_ON(ret < 0);
1962                 }
1963
1964                 /* block accounting for super block */
1965                 spin_lock_irq(&info->delalloc_lock);
1966                 super_used = btrfs_super_bytes_used(&info->super_copy);
1967                 btrfs_set_super_bytes_used(&info->super_copy,
1968                                            super_used - num_bytes);
1969                 spin_unlock_irq(&info->delalloc_lock);
1970
1971                 /* block accounting for root item */
1972                 root_used = btrfs_root_used(&root->root_item);
1973                 btrfs_set_root_used(&root->root_item,
1974                                            root_used - num_bytes);
1975                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1976                                       num_to_del);
1977                 BUG_ON(ret);
1978                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1979                                          mark_free);
1980                 BUG_ON(ret);
1981
1982 #ifdef BIO_RW_DISCARD
1983                 /* Tell the block device(s) that the sectors can be discarded */
1984                 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
1985                                       bytenr, &map_length, &multi, 0);
1986                 if (!ret) {
1987                         struct btrfs_bio_stripe *stripe = multi->stripes;
1988                         int i;
1989
1990                         if (map_length > num_bytes)
1991                                 map_length = num_bytes;
1992
1993                         for (i = 0; i < multi->num_stripes; i++, stripe++) {
1994                                 blkdev_issue_discard(stripe->dev->bdev,
1995                                                      stripe->physical >> 9,
1996                                                      map_length >> 9);
1997                         }
1998                         kfree(multi);
1999                 }
2000 #endif
2001         }
2002         btrfs_free_path(path);
2003         finish_current_insert(trans, extent_root);
2004         return ret;
2005 }
2006
2007 /*
2008  * find all the blocks marked as pending in the radix tree and remove
2009  * them from the extent map
2010  */
2011 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
2012                                btrfs_root *extent_root)
2013 {
2014         int ret;
2015         int err = 0;
2016         int mark_free = 0;
2017         u64 start;
2018         u64 end;
2019         u64 priv;
2020         struct extent_io_tree *pending_del;
2021         struct extent_io_tree *extent_ins;
2022         struct pending_extent_op *extent_op;
2023
2024         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
2025         extent_ins = &extent_root->fs_info->extent_ins;
2026         pending_del = &extent_root->fs_info->pending_del;
2027
2028         while(1) {
2029                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
2030                                             EXTENT_LOCKED);
2031                 if (ret)
2032                         break;
2033
2034                 ret = get_state_private(pending_del, start, &priv);
2035                 BUG_ON(ret);
2036                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2037
2038                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
2039                                   GFP_NOFS);
2040
2041                 ret = pin_down_bytes(trans, extent_root, start,
2042                                      end + 1 - start, 0);
2043                 mark_free = ret > 0;
2044                 if (!test_range_bit(extent_ins, start, end,
2045                                     EXTENT_LOCKED, 0)) {
2046 free_extent:
2047                         ret = __free_extent(trans, extent_root,
2048                                             start, end + 1 - start,
2049                                             extent_op->orig_parent,
2050                                             extent_root->root_key.objectid,
2051                                             extent_op->orig_generation,
2052                                             extent_op->level, 0, 0, mark_free);
2053                         kfree(extent_op);
2054                 } else {
2055                         kfree(extent_op);
2056                         ret = get_state_private(extent_ins, start, &priv);
2057                         BUG_ON(ret);
2058                         extent_op = (struct pending_extent_op *)
2059                                                         (unsigned long)priv;
2060
2061                         clear_extent_bits(extent_ins, start, end,
2062                                           EXTENT_LOCKED, GFP_NOFS);
2063
2064                         if (extent_op->type == PENDING_BACKREF_UPDATE)
2065                                 goto free_extent;
2066
2067                         ret = update_block_group(trans, extent_root, start,
2068                                                 end + 1 - start, 0, mark_free);
2069                         BUG_ON(ret);
2070                         kfree(extent_op);
2071                 }
2072                 if (ret)
2073                         err = ret;
2074
2075                 if (need_resched()) {
2076                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2077                         cond_resched();
2078                         mutex_lock(&extent_root->fs_info->alloc_mutex);
2079                 }
2080         }
2081         return err;
2082 }
2083
2084 /*
2085  * remove an extent from the root, returns 0 on success
2086  */
2087 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2088                                struct btrfs_root *root,
2089                                u64 bytenr, u64 num_bytes, u64 parent,
2090                                u64 root_objectid, u64 ref_generation,
2091                                u64 owner_objectid, u64 owner_offset, int pin)
2092 {
2093         struct btrfs_root *extent_root = root->fs_info->extent_root;
2094         int pending_ret;
2095         int ret;
2096
2097         WARN_ON(num_bytes < root->sectorsize);
2098         if (root == extent_root) {
2099                 struct pending_extent_op *extent_op;
2100
2101                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2102                 BUG_ON(!extent_op);
2103
2104                 extent_op->type = PENDING_EXTENT_DELETE;
2105                 extent_op->bytenr = bytenr;
2106                 extent_op->num_bytes = num_bytes;
2107                 extent_op->parent = parent;
2108                 extent_op->orig_parent = parent;
2109                 extent_op->generation = ref_generation;
2110                 extent_op->orig_generation = ref_generation;
2111                 extent_op->level = (int)owner_objectid;
2112
2113                 set_extent_bits(&root->fs_info->pending_del,
2114                                 bytenr, bytenr + num_bytes - 1,
2115                                 EXTENT_LOCKED, GFP_NOFS);
2116                 set_state_private(&root->fs_info->pending_del,
2117                                   bytenr, (unsigned long)extent_op);
2118                 return 0;
2119         }
2120         /* if metadata always pin */
2121         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2122                 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2123                         struct btrfs_block_group_cache *cache;
2124
2125                         /* btrfs_free_reserved_extent */
2126                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
2127                         BUG_ON(!cache);
2128                         btrfs_add_free_space(cache, bytenr, num_bytes);
2129                         return 0;
2130                 }
2131                 pin = 1;
2132         }
2133
2134         /* if data pin when any transaction has committed this */
2135         if (ref_generation != trans->transid)
2136                 pin = 1;
2137
2138         ret = __free_extent(trans, root, bytenr, num_bytes, parent,
2139                             root_objectid, ref_generation, owner_objectid,
2140                             owner_offset, pin, pin == 0);
2141
2142         finish_current_insert(trans, root->fs_info->extent_root);
2143         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
2144         return ret ? ret : pending_ret;
2145 }
2146
2147 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2148                       struct btrfs_root *root,
2149                       u64 bytenr, u64 num_bytes, u64 parent,
2150                       u64 root_objectid, u64 ref_generation,
2151                       u64 owner_objectid, u64 owner_offset, int pin)
2152 {
2153         int ret;
2154
2155         maybe_lock_mutex(root);
2156         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent,
2157                                   root_objectid, ref_generation,
2158                                   owner_objectid, owner_offset, pin);
2159         maybe_unlock_mutex(root);
2160         return ret;
2161 }
2162
2163 static u64 stripe_align(struct btrfs_root *root, u64 val)
2164 {
2165         u64 mask = ((u64)root->stripesize - 1);
2166         u64 ret = (val + mask) & ~mask;
2167         return ret;
2168 }
2169
2170 /*
2171  * walks the btree of allocated extents and find a hole of a given size.
2172  * The key ins is changed to record the hole:
2173  * ins->objectid == block start
2174  * ins->flags = BTRFS_EXTENT_ITEM_KEY
2175  * ins->offset == number of blocks
2176  * Any available blocks before search_start are skipped.
2177  */
2178 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2179                                      struct btrfs_root *orig_root,
2180                                      u64 num_bytes, u64 empty_size,
2181                                      u64 search_start, u64 search_end,
2182                                      u64 hint_byte, struct btrfs_key *ins,
2183                                      u64 exclude_start, u64 exclude_nr,
2184                                      int data)
2185 {
2186         int ret;
2187         u64 orig_search_start;
2188         struct btrfs_root * root = orig_root->fs_info->extent_root;
2189         struct btrfs_fs_info *info = root->fs_info;
2190         u64 total_needed = num_bytes;
2191         u64 *last_ptr = NULL;
2192         struct btrfs_block_group_cache *block_group;
2193         int chunk_alloc_done = 0;
2194         int empty_cluster = 2 * 1024 * 1024;
2195         int allowed_chunk_alloc = 0;
2196
2197         WARN_ON(num_bytes < root->sectorsize);
2198         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
2199
2200         if (orig_root->ref_cows || empty_size)
2201                 allowed_chunk_alloc = 1;
2202
2203         if (data & BTRFS_BLOCK_GROUP_METADATA) {
2204                 last_ptr = &root->fs_info->last_alloc;
2205                 empty_cluster = 256 * 1024;
2206         }
2207
2208         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
2209                 last_ptr = &root->fs_info->last_data_alloc;
2210
2211         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2212                 last_ptr = &root->fs_info->last_log_alloc;
2213                 if (!last_ptr == 0 && root->fs_info->last_alloc) {
2214                         *last_ptr = root->fs_info->last_alloc + empty_cluster;
2215                 }
2216         }
2217
2218         if (last_ptr) {
2219                 if (*last_ptr)
2220                         hint_byte = *last_ptr;
2221                 else
2222                         empty_size += empty_cluster;
2223         }
2224
2225         search_start = max(search_start, first_logical_byte(root, 0));
2226         orig_search_start = search_start;
2227
2228         if (search_end == (u64)-1)
2229                 search_end = btrfs_super_total_bytes(&info->super_copy);
2230
2231         search_start = max(search_start, hint_byte);
2232         total_needed += empty_size;
2233
2234 new_group:
2235         block_group = btrfs_lookup_block_group(info, search_start);
2236
2237         /*
2238          * Ok this looks a little tricky, buts its really simple.  First if we
2239          * didn't find a block group obviously we want to start over.
2240          * Secondly, if the block group we found does not match the type we
2241          * need, and we have a last_ptr and its not 0, chances are the last
2242          * allocation we made was at the end of the block group, so lets go
2243          * ahead and skip the looking through the rest of the block groups and
2244          * start at the beginning.  This helps with metadata allocations,
2245          * since you are likely to have a bunch of data block groups to search
2246          * through first before you realize that you need to start over, so go
2247          * ahead and start over and save the time.
2248          */
2249         if (!block_group || (!block_group_bits(block_group, data) &&
2250                              last_ptr && *last_ptr)) {
2251                 if (search_start != orig_search_start) {
2252                         if (last_ptr && *last_ptr)
2253                                 *last_ptr = 0;
2254                         search_start = orig_search_start;
2255                         goto new_group;
2256                 } else if (!chunk_alloc_done && allowed_chunk_alloc) {
2257                         ret = do_chunk_alloc(trans, root,
2258                                              num_bytes + 2 * 1024 * 1024,
2259                                              data, 1);
2260                         if (ret < 0) {
2261                                 struct btrfs_space_info *info;
2262
2263                                 info = __find_space_info(root->fs_info, data);
2264                                 goto error;
2265                         }
2266                         BUG_ON(ret);
2267                         chunk_alloc_done = 1;
2268                         search_start = orig_search_start;
2269                         goto new_group;
2270                 } else {
2271                         ret = -ENOSPC;
2272                         goto error;
2273                 }
2274         }
2275
2276         /*
2277          * this is going to seach through all of the existing block groups it
2278          * can find, so if we don't find something we need to see if we can
2279          * allocate what we need.
2280          */
2281         ret = find_free_space(root, &block_group, &search_start,
2282                               total_needed, data);
2283         if (ret == -ENOSPC) {
2284                 /*
2285                  * instead of allocating, start at the original search start
2286                  * and see if there is something to be found, if not then we
2287                  * allocate
2288                  */
2289                 if (search_start != orig_search_start) {
2290                         if (last_ptr && *last_ptr) {
2291                                 *last_ptr = 0;
2292                                 total_needed += empty_cluster;
2293                         }
2294                         search_start = orig_search_start;
2295                         goto new_group;
2296                 }
2297
2298                 /*
2299                  * we've already allocated, we're pretty screwed
2300                  */
2301                 if (chunk_alloc_done) {
2302                         goto error;
2303                 } else if (!allowed_chunk_alloc && block_group &&
2304                            block_group_bits(block_group, data)) {
2305                         block_group->space_info->force_alloc = 1;
2306                         goto error;
2307                 } else if (!allowed_chunk_alloc) {
2308                         goto error;
2309                 }
2310
2311                 ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024,
2312                                      data, 1);
2313                 if (ret < 0)
2314                         goto error;
2315
2316                 BUG_ON(ret);
2317                 chunk_alloc_done = 1;
2318                 if (block_group)
2319                         search_start = block_group->key.objectid +
2320                                 block_group->key.offset;
2321                 else
2322                         search_start = orig_search_start;
2323                 goto new_group;
2324         }
2325
2326         if (ret)
2327                 goto error;
2328
2329         search_start = stripe_align(root, search_start);
2330         ins->objectid = search_start;
2331         ins->offset = num_bytes;
2332
2333         if (ins->objectid + num_bytes >= search_end) {
2334                 search_start = orig_search_start;
2335                 if (chunk_alloc_done) {
2336                         ret = -ENOSPC;
2337                         goto error;
2338                 }
2339                 goto new_group;
2340         }
2341
2342         if (ins->objectid + num_bytes >
2343             block_group->key.objectid + block_group->key.offset) {
2344                 if (search_start == orig_search_start && chunk_alloc_done) {
2345                         ret = -ENOSPC;
2346                         goto error;
2347                 }
2348                 search_start = block_group->key.objectid +
2349                         block_group->key.offset;
2350                 goto new_group;
2351         }
2352
2353         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
2354             ins->objectid < exclude_start + exclude_nr)) {
2355                 search_start = exclude_start + exclude_nr;
2356                 goto new_group;
2357         }
2358
2359         if (!(data & BTRFS_BLOCK_GROUP_DATA))
2360                 trans->block_group = block_group;
2361
2362         ins->offset = num_bytes;
2363         if (last_ptr) {
2364                 *last_ptr = ins->objectid + ins->offset;
2365                 if (*last_ptr ==
2366                     btrfs_super_total_bytes(&root->fs_info->super_copy))
2367                         *last_ptr = 0;
2368         }
2369
2370         ret = 0;
2371 error:
2372         return ret;
2373 }
2374
2375 static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
2376 {
2377         struct btrfs_block_group_cache *cache;
2378         struct list_head *l;
2379
2380         printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
2381                info->total_bytes - info->bytes_used - info->bytes_pinned,
2382                (info->full) ? "" : "not ");
2383
2384         spin_lock(&info->lock);
2385         list_for_each(l, &info->block_groups) {
2386                 cache = list_entry(l, struct btrfs_block_group_cache, list);
2387                 spin_lock(&cache->lock);
2388                 printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
2389                        "%Lu pinned\n",
2390                        cache->key.objectid, cache->key.offset,
2391                        btrfs_block_group_used(&cache->item), cache->pinned);
2392                 btrfs_dump_free_space(cache, bytes);
2393                 spin_unlock(&cache->lock);
2394         }
2395         spin_unlock(&info->lock);
2396 }
2397 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2398                                   struct btrfs_root *root,
2399                                   u64 num_bytes, u64 min_alloc_size,
2400                                   u64 empty_size, u64 hint_byte,
2401                                   u64 search_end, struct btrfs_key *ins,
2402                                   u64 data)
2403 {
2404         int ret;
2405         u64 search_start = 0;
2406         u64 alloc_profile;
2407         struct btrfs_fs_info *info = root->fs_info;
2408         struct btrfs_block_group_cache *cache;
2409
2410         if (data) {
2411                 alloc_profile = info->avail_data_alloc_bits &
2412                                 info->data_alloc_profile;
2413                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2414         } else if (root == root->fs_info->chunk_root) {
2415                 alloc_profile = info->avail_system_alloc_bits &
2416                                 info->system_alloc_profile;
2417                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2418         } else {
2419                 alloc_profile = info->avail_metadata_alloc_bits &
2420                                 info->metadata_alloc_profile;
2421                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2422         }
2423 again:
2424         data = reduce_alloc_profile(root, data);
2425         /*
2426          * the only place that sets empty_size is btrfs_realloc_node, which
2427          * is not called recursively on allocations
2428          */
2429         if (empty_size || root->ref_cows) {
2430                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
2431                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2432                                      2 * 1024 * 1024,
2433                                      BTRFS_BLOCK_GROUP_METADATA |
2434                                      (info->metadata_alloc_profile &
2435                                       info->avail_metadata_alloc_bits), 0);
2436                 }
2437                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2438                                      num_bytes + 2 * 1024 * 1024, data, 0);
2439         }
2440
2441         WARN_ON(num_bytes < root->sectorsize);
2442         ret = find_free_extent(trans, root, num_bytes, empty_size,
2443                                search_start, search_end, hint_byte, ins,
2444                                trans->alloc_exclude_start,
2445                                trans->alloc_exclude_nr, data);
2446
2447         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
2448                 num_bytes = num_bytes >> 1;
2449                 num_bytes = num_bytes & ~(root->sectorsize - 1);
2450                 num_bytes = max(num_bytes, min_alloc_size);
2451                 do_chunk_alloc(trans, root->fs_info->extent_root,
2452                                num_bytes, data, 1);
2453                 goto again;
2454         }
2455         if (ret) {
2456                 struct btrfs_space_info *sinfo;
2457
2458                 sinfo = __find_space_info(root->fs_info, data);
2459                 printk("allocation failed flags %Lu, wanted %Lu\n",
2460                        data, num_bytes);
2461                 dump_space_info(sinfo, num_bytes);
2462                 BUG();
2463         }
2464         cache = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2465         if (!cache) {
2466                 printk(KERN_ERR "Unable to find block group for %Lu\n", ins->objectid);
2467                 return -ENOSPC;
2468         }
2469
2470         ret = btrfs_remove_free_space(cache, ins->objectid, ins->offset);
2471
2472         return ret;
2473 }
2474
2475 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
2476 {
2477         struct btrfs_block_group_cache *cache;
2478
2479         maybe_lock_mutex(root);
2480         cache = btrfs_lookup_block_group(root->fs_info, start);
2481         if (!cache) {
2482                 printk(KERN_ERR "Unable to find block group for %Lu\n", start);
2483                 maybe_unlock_mutex(root);
2484                 return -ENOSPC;
2485         }
2486         btrfs_add_free_space(cache, start, len);
2487         maybe_unlock_mutex(root);
2488         return 0;
2489 }
2490
2491 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2492                                   struct btrfs_root *root,
2493                                   u64 num_bytes, u64 min_alloc_size,
2494                                   u64 empty_size, u64 hint_byte,
2495                                   u64 search_end, struct btrfs_key *ins,
2496                                   u64 data)
2497 {
2498         int ret;
2499         maybe_lock_mutex(root);
2500         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
2501                                      empty_size, hint_byte, search_end, ins,
2502                                      data);
2503         maybe_unlock_mutex(root);
2504         return ret;
2505 }
2506
2507 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2508                                          struct btrfs_root *root, u64 parent,
2509                                          u64 root_objectid, u64 ref_generation,
2510                                          u64 owner, u64 owner_offset,
2511                                          struct btrfs_key *ins)
2512 {
2513         int ret;
2514         int pending_ret;
2515         u64 super_used;
2516         u64 root_used;
2517         u64 num_bytes = ins->offset;
2518         u32 sizes[2];
2519         struct btrfs_fs_info *info = root->fs_info;
2520         struct btrfs_root *extent_root = info->extent_root;
2521         struct btrfs_extent_item *extent_item;
2522         struct btrfs_extent_ref *ref;
2523         struct btrfs_path *path;
2524         struct btrfs_key keys[2];
2525
2526         if (parent == 0)
2527                 parent = ins->objectid;
2528
2529         /* block accounting for super block */
2530         spin_lock_irq(&info->delalloc_lock);
2531         super_used = btrfs_super_bytes_used(&info->super_copy);
2532         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2533         spin_unlock_irq(&info->delalloc_lock);
2534
2535         /* block accounting for root item */
2536         root_used = btrfs_root_used(&root->root_item);
2537         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
2538
2539         if (root == extent_root) {
2540                 struct pending_extent_op *extent_op;
2541
2542                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2543                 BUG_ON(!extent_op);
2544
2545                 extent_op->type = PENDING_EXTENT_INSERT;
2546                 extent_op->bytenr = ins->objectid;
2547                 extent_op->num_bytes = ins->offset;
2548                 extent_op->parent = parent;
2549                 extent_op->orig_parent = 0;
2550                 extent_op->generation = ref_generation;
2551                 extent_op->orig_generation = 0;
2552                 extent_op->level = (int)owner;
2553
2554                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2555                                 ins->objectid + ins->offset - 1,
2556                                 EXTENT_LOCKED, GFP_NOFS);
2557                 set_state_private(&root->fs_info->extent_ins,
2558                                   ins->objectid, (unsigned long)extent_op);
2559                 goto update_block;
2560         }
2561
2562         memcpy(&keys[0], ins, sizeof(*ins));
2563         keys[1].objectid = ins->objectid;
2564         keys[1].type = BTRFS_EXTENT_REF_KEY;
2565         keys[1].offset = parent;
2566         sizes[0] = sizeof(*extent_item);
2567         sizes[1] = sizeof(*ref);
2568
2569         path = btrfs_alloc_path();
2570         BUG_ON(!path);
2571
2572         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2573                                        sizes, 2);
2574         BUG_ON(ret);
2575
2576         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2577                                      struct btrfs_extent_item);
2578         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2579         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2580                              struct btrfs_extent_ref);
2581
2582         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2583         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2584         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2585         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
2586         btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
2587
2588         btrfs_mark_buffer_dirty(path->nodes[0]);
2589
2590         trans->alloc_exclude_start = 0;
2591         trans->alloc_exclude_nr = 0;
2592         btrfs_free_path(path);
2593         finish_current_insert(trans, extent_root);
2594         pending_ret = del_pending_extents(trans, extent_root);
2595
2596         if (ret)
2597                 goto out;
2598         if (pending_ret) {
2599                 ret = pending_ret;
2600                 goto out;
2601         }
2602
2603 update_block:
2604         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
2605         if (ret) {
2606                 printk("update block group failed for %Lu %Lu\n",
2607                        ins->objectid, ins->offset);
2608                 BUG();
2609         }
2610 out:
2611         return ret;
2612 }
2613
2614 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2615                                 struct btrfs_root *root, u64 parent,
2616                                 u64 root_objectid, u64 ref_generation,
2617                                 u64 owner, u64 owner_offset,
2618                                 struct btrfs_key *ins)
2619 {
2620         int ret;
2621
2622         if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
2623                 return 0;
2624         maybe_lock_mutex(root);
2625         ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2626                                             root_objectid, ref_generation,
2627                                             owner, owner_offset, ins);
2628         maybe_unlock_mutex(root);
2629         return ret;
2630 }
2631
2632 /*
2633  * this is used by the tree logging recovery code.  It records that
2634  * an extent has been allocated and makes sure to clear the free
2635  * space cache bits as well
2636  */
2637 int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
2638                                 struct btrfs_root *root, u64 parent,
2639                                 u64 root_objectid, u64 ref_generation,
2640                                 u64 owner, u64 owner_offset,
2641                                 struct btrfs_key *ins)
2642 {
2643         int ret;
2644         struct btrfs_block_group_cache *block_group;
2645
2646         maybe_lock_mutex(root);
2647         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2648         cache_block_group(root, block_group);
2649
2650         ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset);
2651         BUG_ON(ret);
2652         ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2653                                             root_objectid, ref_generation,
2654                                             owner, owner_offset, ins);
2655         maybe_unlock_mutex(root);
2656         return ret;
2657 }
2658
2659 /*
2660  * finds a free extent and does all the dirty work required for allocation
2661  * returns the key for the extent through ins, and a tree buffer for
2662  * the first block of the extent through buf.
2663  *
2664  * returns 0 if everything worked, non-zero otherwise.
2665  */
2666 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2667                        struct btrfs_root *root,
2668                        u64 num_bytes, u64 parent, u64 min_alloc_size,
2669                        u64 root_objectid, u64 ref_generation,
2670                        u64 owner_objectid, u64 owner_offset,
2671                        u64 empty_size, u64 hint_byte,
2672                        u64 search_end, struct btrfs_key *ins, u64 data)
2673 {
2674         int ret;
2675
2676         maybe_lock_mutex(root);
2677
2678         ret = __btrfs_reserve_extent(trans, root, num_bytes,
2679                                      min_alloc_size, empty_size, hint_byte,
2680                                      search_end, ins, data);
2681         BUG_ON(ret);
2682         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
2683                 ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2684                                         root_objectid, ref_generation,
2685                                         owner_objectid, owner_offset, ins);
2686                 BUG_ON(ret);
2687
2688         }
2689         maybe_unlock_mutex(root);
2690         return ret;
2691 }
2692
2693 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
2694                                             struct btrfs_root *root,
2695                                             u64 bytenr, u32 blocksize)
2696 {
2697         struct extent_buffer *buf;
2698
2699         buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
2700         if (!buf)
2701                 return ERR_PTR(-ENOMEM);
2702         btrfs_set_header_generation(buf, trans->transid);
2703         btrfs_tree_lock(buf);
2704         clean_tree_block(trans, root, buf);
2705         btrfs_set_buffer_uptodate(buf);
2706         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2707                 set_extent_dirty(&root->dirty_log_pages, buf->start,
2708                          buf->start + buf->len - 1, GFP_NOFS);
2709         } else {
2710                 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2711                          buf->start + buf->len - 1, GFP_NOFS);
2712         }
2713         trans->blocks_used++;
2714         return buf;
2715 }
2716
2717 /*
2718  * helper function to allocate a block for a given tree
2719  * returns the tree buffer or NULL.
2720  */
2721 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2722                                              struct btrfs_root *root,
2723                                              u32 blocksize, u64 parent,
2724                                              u64 root_objectid,
2725                                              u64 ref_generation,
2726                                              int level,
2727                                              u64 hint,
2728                                              u64 empty_size)
2729 {
2730         struct btrfs_key ins;
2731         int ret;
2732         struct extent_buffer *buf;
2733
2734         ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
2735                                  root_objectid, ref_generation, level, 0,
2736                                  empty_size, hint, (u64)-1, &ins, 0);
2737         if (ret) {
2738                 BUG_ON(ret > 0);
2739                 return ERR_PTR(ret);
2740         }
2741
2742         buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
2743         return buf;
2744 }
2745
2746 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
2747                         struct btrfs_root *root, struct extent_buffer *leaf)
2748 {
2749         u64 leaf_owner;
2750         u64 leaf_generation;
2751         struct btrfs_key key;
2752         struct btrfs_file_extent_item *fi;
2753         int i;
2754         int nritems;
2755         int ret;
2756
2757         BUG_ON(!btrfs_is_leaf(leaf));
2758         nritems = btrfs_header_nritems(leaf);
2759         leaf_owner = btrfs_header_owner(leaf);
2760         leaf_generation = btrfs_header_generation(leaf);
2761
2762         for (i = 0; i < nritems; i++) {
2763                 u64 disk_bytenr;
2764                 cond_resched();
2765
2766                 btrfs_item_key_to_cpu(leaf, &key, i);
2767                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2768                         continue;
2769                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2770                 if (btrfs_file_extent_type(leaf, fi) ==
2771                     BTRFS_FILE_EXTENT_INLINE)
2772                         continue;
2773                 /*
2774                  * FIXME make sure to insert a trans record that
2775                  * repeats the snapshot del on crash
2776                  */
2777                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2778                 if (disk_bytenr == 0)
2779                         continue;
2780
2781                 mutex_lock(&root->fs_info->alloc_mutex);
2782                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2783                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2784                                 leaf->start, leaf_owner, leaf_generation,
2785                                 key.objectid, key.offset, 0);
2786                 mutex_unlock(&root->fs_info->alloc_mutex);
2787                 BUG_ON(ret);
2788
2789                 atomic_inc(&root->fs_info->throttle_gen);
2790                 wake_up(&root->fs_info->transaction_throttle);
2791                 cond_resched();
2792         }
2793         return 0;
2794 }
2795
2796 static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
2797                                         struct btrfs_root *root,
2798                                         struct btrfs_leaf_ref *ref)
2799 {
2800         int i;
2801         int ret;
2802         struct btrfs_extent_info *info = ref->extents;
2803
2804         for (i = 0; i < ref->nritems; i++) {
2805                 mutex_lock(&root->fs_info->alloc_mutex);
2806                 ret = __btrfs_free_extent(trans, root, info->bytenr,
2807                                           info->num_bytes, ref->bytenr,
2808                                           ref->owner, ref->generation,
2809                                           info->objectid, info->offset, 0);
2810                 mutex_unlock(&root->fs_info->alloc_mutex);
2811
2812                 atomic_inc(&root->fs_info->throttle_gen);
2813                 wake_up(&root->fs_info->transaction_throttle);
2814                 cond_resched();
2815
2816                 BUG_ON(ret);
2817                 info++;
2818         }
2819
2820         return 0;
2821 }
2822
2823 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2824                               u32 *refs)
2825 {
2826         int ret;
2827
2828         ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs);
2829         BUG_ON(ret);
2830
2831 #if 0 // some debugging code in case we see problems here
2832         /* if the refs count is one, it won't get increased again.  But
2833          * if the ref count is > 1, someone may be decreasing it at
2834          * the same time we are.
2835          */
2836         if (*refs != 1) {
2837                 struct extent_buffer *eb = NULL;
2838                 eb = btrfs_find_create_tree_block(root, start, len);
2839                 if (eb)
2840                         btrfs_tree_lock(eb);
2841
2842                 mutex_lock(&root->fs_info->alloc_mutex);
2843                 ret = lookup_extent_ref(NULL, root, start, len, refs);
2844                 BUG_ON(ret);
2845                 mutex_unlock(&root->fs_info->alloc_mutex);
2846
2847                 if (eb) {
2848                         btrfs_tree_unlock(eb);
2849                         free_extent_buffer(eb);
2850                 }
2851                 if (*refs == 1) {
2852                         printk("block %llu went down to one during drop_snap\n",
2853                                (unsigned long long)start);
2854                 }
2855
2856         }
2857 #endif
2858
2859         cond_resched();
2860         return ret;
2861 }
2862
2863 /*
2864  * helper function for drop_snapshot, this walks down the tree dropping ref
2865  * counts as it goes.
2866  */
2867 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2868                                    struct btrfs_root *root,
2869                                    struct btrfs_path *path, int *level)
2870 {
2871         u64 root_owner;
2872         u64 root_gen;
2873         u64 bytenr;
2874         u64 ptr_gen;
2875         struct extent_buffer *next;
2876         struct extent_buffer *cur;
2877         struct extent_buffer *parent;
2878         struct btrfs_leaf_ref *ref;
2879         u32 blocksize;
2880         int ret;
2881         u32 refs;
2882
2883         WARN_ON(*level < 0);
2884         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2885         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
2886                                 path->nodes[*level]->len, &refs);
2887         BUG_ON(ret);
2888         if (refs > 1)
2889                 goto out;
2890
2891         /*
2892          * walk down to the last node level and free all the leaves
2893          */
2894         while(*level >= 0) {
2895                 WARN_ON(*level < 0);
2896                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2897                 cur = path->nodes[*level];
2898
2899                 if (btrfs_header_level(cur) != *level)
2900                         WARN_ON(1);
2901
2902                 if (path->slots[*level] >=
2903                     btrfs_header_nritems(cur))
2904                         break;
2905                 if (*level == 0) {
2906                         ret = btrfs_drop_leaf_ref(trans, root, cur);
2907                         BUG_ON(ret);
2908                         break;
2909                 }
2910                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2911                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2912                 blocksize = btrfs_level_size(root, *level - 1);
2913
2914                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
2915                 BUG_ON(ret);
2916                 if (refs != 1) {
2917                         parent = path->nodes[*level];
2918                         root_owner = btrfs_header_owner(parent);
2919                         root_gen = btrfs_header_generation(parent);
2920                         path->slots[*level]++;
2921
2922                         mutex_lock(&root->fs_info->alloc_mutex);
2923                         ret = __btrfs_free_extent(trans, root, bytenr,
2924                                                 blocksize, parent->start,
2925                                                 root_owner, root_gen, 0, 0, 1);
2926                         BUG_ON(ret);
2927                         mutex_unlock(&root->fs_info->alloc_mutex);
2928
2929                         atomic_inc(&root->fs_info->throttle_gen);
2930                         wake_up(&root->fs_info->transaction_throttle);
2931                         cond_resched();
2932
2933                         continue;
2934                 }
2935                 /*
2936                  * at this point, we have a single ref, and since the
2937                  * only place referencing this extent is a dead root
2938                  * the reference count should never go higher.
2939                  * So, we don't need to check it again
2940                  */
2941                 if (*level == 1) {
2942                         ref = btrfs_lookup_leaf_ref(root, bytenr);
2943                         if (ref) {
2944                                 ret = cache_drop_leaf_ref(trans, root, ref);
2945                                 BUG_ON(ret);
2946                                 btrfs_remove_leaf_ref(root, ref);
2947                                 btrfs_free_leaf_ref(root, ref);
2948                                 *level = 0;
2949                                 break;
2950                         }
2951                         if (printk_ratelimit())
2952                                 printk("leaf ref miss for bytenr %llu\n",
2953                                        (unsigned long long)bytenr);
2954                 }
2955                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2956                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2957                         free_extent_buffer(next);
2958
2959                         next = read_tree_block(root, bytenr, blocksize,
2960                                                ptr_gen);
2961                         cond_resched();
2962 #if 0
2963                         /*
2964                          * this is a debugging check and can go away
2965                          * the ref should never go all the way down to 1
2966                          * at this point
2967                          */
2968                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2969                                                 &refs);
2970                         BUG_ON(ret);
2971                         WARN_ON(refs != 1);
2972 #endif
2973                 }
2974                 WARN_ON(*level <= 0);
2975                 if (path->nodes[*level-1])
2976                         free_extent_buffer(path->nodes[*level-1]);
2977                 path->nodes[*level-1] = next;
2978                 *level = btrfs_header_level(next);
2979                 path->slots[*level] = 0;
2980                 cond_resched();
2981         }
2982 out:
2983         WARN_ON(*level < 0);
2984         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2985
2986         if (path->nodes[*level] == root->node) {
2987                 parent = path->nodes[*level];
2988                 bytenr = path->nodes[*level]->start;
2989         } else {
2990                 parent = path->nodes[*level + 1];
2991                 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
2992         }
2993
2994         blocksize = btrfs_level_size(root, *level);
2995         root_owner = btrfs_header_owner(parent);
2996         root_gen = btrfs_header_generation(parent);
2997
2998         mutex_lock(&root->fs_info->alloc_mutex);
2999         ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
3000                                   parent->start, root_owner, root_gen,
3001                                   0, 0, 1);
3002         mutex_unlock(&root->fs_info->alloc_mutex);
3003         free_extent_buffer(path->nodes[*level]);
3004         path->nodes[*level] = NULL;
3005         *level += 1;
3006         BUG_ON(ret);
3007
3008         cond_resched();
3009         return 0;
3010 }
3011
3012 /*
3013  * helper for dropping snapshots.  This walks back up the tree in the path
3014  * to find the first node higher up where we haven't yet gone through
3015  * all the slots
3016  */
3017 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3018                                  struct btrfs_root *root,
3019                                  struct btrfs_path *path, int *level)
3020 {
3021         u64 root_owner;
3022         u64 root_gen;
3023         struct btrfs_root_item *root_item = &root->root_item;
3024         int i;
3025         int slot;
3026         int ret;
3027
3028         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
3029                 slot = path->slots[i];
3030                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3031                         struct extent_buffer *node;
3032                         struct btrfs_disk_key disk_key;
3033                         node = path->nodes[i];
3034                         path->slots[i]++;
3035                         *level = i;
3036                         WARN_ON(*level == 0);
3037                         btrfs_node_key(node, &disk_key, path->slots[i]);
3038                         memcpy(&root_item->drop_progress,
3039                                &disk_key, sizeof(disk_key));
3040                         root_item->drop_level = i;
3041                         return 0;
3042                 } else {
3043                         struct extent_buffer *parent;
3044                         if (path->nodes[*level] == root->node)
3045                                 parent = path->nodes[*level];
3046                         else
3047                                 parent = path->nodes[*level + 1];
3048
3049                         root_owner = btrfs_header_owner(parent);
3050                         root_gen = btrfs_header_generation(parent);
3051                         ret = btrfs_free_extent(trans, root,
3052                                                 path->nodes[*level]->start,
3053                                                 path->nodes[*level]->len,
3054                                                 parent->start,
3055                                                 root_owner, root_gen, 0, 0, 1);
3056                         BUG_ON(ret);
3057                         free_extent_buffer(path->nodes[*level]);
3058                         path->nodes[*level] = NULL;
3059                         *level = i + 1;
3060                 }
3061         }
3062         return 1;
3063 }
3064
3065 /*
3066  * drop the reference count on the tree rooted at 'snap'.  This traverses
3067  * the tree freeing any blocks that have a ref count of zero after being
3068  * decremented.
3069  */
3070 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3071                         *root)
3072 {
3073         int ret = 0;
3074         int wret;
3075         int level;
3076         struct btrfs_path *path;
3077         int i;
3078         int orig_level;
3079         struct btrfs_root_item *root_item = &root->root_item;
3080
3081         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
3082         path = btrfs_alloc_path();
3083         BUG_ON(!path);
3084
3085         level = btrfs_header_level(root->node);
3086         orig_level = level;
3087         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3088                 path->nodes[level] = root->node;
3089                 extent_buffer_get(root->node);
3090                 path->slots[level] = 0;
3091         } else {
3092                 struct btrfs_key key;
3093                 struct btrfs_disk_key found_key;
3094                 struct extent_buffer *node;
3095
3096                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3097                 level = root_item->drop_level;
3098                 path->lowest_level = level;
3099                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3100                 if (wret < 0) {
3101                         ret = wret;
3102                         goto out;
3103                 }
3104                 node = path->nodes[level];
3105                 btrfs_node_key(node, &found_key, path->slots[level]);
3106                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3107                                sizeof(found_key)));
3108                 /*
3109                  * unlock our path, this is safe because only this
3110                  * function is allowed to delete this snapshot
3111                  */
3112                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
3113                         if (path->nodes[i] && path->locks[i]) {
3114                                 path->locks[i] = 0;
3115                                 btrfs_tree_unlock(path->nodes[i]);
3116                         }
3117                 }
3118         }
3119         while(1) {
3120                 wret = walk_down_tree(trans, root, path, &level);
3121                 if (wret > 0)
3122                         break;
3123                 if (wret < 0)
3124                         ret = wret;
3125
3126                 wret = walk_up_tree(trans, root, path, &level);
3127                 if (wret > 0)
3128                         break;
3129                 if (wret < 0)
3130                         ret = wret;
3131                 if (trans->transaction->in_commit) {
3132                         ret = -EAGAIN;
3133                         break;
3134                 }
3135                 atomic_inc(&root->fs_info->throttle_gen);
3136                 wake_up(&root->fs_info->transaction_throttle);
3137         }
3138         for (i = 0; i <= orig_level; i++) {
3139                 if (path->nodes[i]) {
3140                         free_extent_buffer(path->nodes[i]);
3141                         path->nodes[i] = NULL;
3142                 }
3143         }
3144 out:
3145         btrfs_free_path(path);
3146         return ret;
3147 }
3148
3149 int btrfs_free_block_groups(struct btrfs_fs_info *info)
3150 {
3151         struct btrfs_block_group_cache *block_group;
3152         struct rb_node *n;
3153
3154         mutex_lock(&info->alloc_mutex);
3155         spin_lock(&info->block_group_cache_lock);
3156         while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
3157                 block_group = rb_entry(n, struct btrfs_block_group_cache,
3158                                        cache_node);
3159
3160                 spin_unlock(&info->block_group_cache_lock);
3161                 btrfs_remove_free_space_cache(block_group);
3162                 spin_lock(&info->block_group_cache_lock);
3163
3164                 rb_erase(&block_group->cache_node,
3165                          &info->block_group_cache_tree);
3166
3167                 spin_lock(&block_group->space_info->lock);
3168                 list_del(&block_group->list);
3169                 spin_unlock(&block_group->space_info->lock);
3170                 kfree(block_group);
3171         }
3172         spin_unlock(&info->block_group_cache_lock);
3173         mutex_unlock(&info->alloc_mutex);
3174         return 0;
3175 }
3176
3177 static unsigned long calc_ra(unsigned long start, unsigned long last,
3178                              unsigned long nr)
3179 {
3180         return min(last, start + nr - 1);
3181 }
3182
3183 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
3184                                          u64 len)
3185 {
3186         u64 page_start;
3187         u64 page_end;
3188         unsigned long last_index;
3189         unsigned long i;
3190         struct page *page;
3191         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3192         struct file_ra_state *ra;
3193         unsigned long total_read = 0;
3194         unsigned long ra_pages;
3195         struct btrfs_ordered_extent *ordered;
3196         struct btrfs_trans_handle *trans;
3197
3198         ra = kzalloc(sizeof(*ra), GFP_NOFS);
3199
3200         mutex_lock(&inode->i_mutex);
3201         i = start >> PAGE_CACHE_SHIFT;
3202         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
3203
3204         ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
3205
3206         file_ra_state_init(ra, inode->i_mapping);
3207
3208         for (; i <= last_index; i++) {
3209                 if (total_read % ra_pages == 0) {
3210                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
3211                                        calc_ra(i, last_index, ra_pages));
3212                 }
3213                 total_read++;
3214 again:
3215                 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
3216                         goto truncate_racing;
3217                 page = grab_cache_page(inode->i_mapping, i);
3218                 if (!page) {
3219                         goto out_unlock;
3220                 }
3221                 if (!PageUptodate(page)) {
3222                         btrfs_readpage(NULL, page);
3223                         lock_page(page);
3224                         if (!PageUptodate(page)) {
3225                                 unlock_page(page);
3226                                 page_cache_release(page);
3227                                 goto out_unlock;
3228                         }
3229                 }
3230                 wait_on_page_writeback(page);
3231
3232                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
3233                 page_end = page_start + PAGE_CACHE_SIZE - 1;
3234                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
3235
3236                 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3237                 if (ordered) {
3238                         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3239                         unlock_page(page);
3240                         page_cache_release(page);
3241                         btrfs_start_ordered_extent(inode, ordered, 1);
3242                         btrfs_put_ordered_extent(ordered);
3243                         goto again;
3244                 }
3245                 set_page_extent_mapped(page);
3246
3247                 /*
3248                  * make sure page_mkwrite is called for this page if userland
3249                  * wants to change it from mmap
3250                  */
3251                 clear_page_dirty_for_io(page);
3252
3253                 btrfs_set_extent_delalloc(inode, page_start, page_end);
3254                 set_page_dirty(page);
3255
3256                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3257                 unlock_page(page);
3258                 page_cache_release(page);
3259         }
3260
3261 out_unlock:
3262         /* we have to start the IO in order to get the ordered extents
3263          * instantiated.  This allows the relocation to code to wait
3264          * for all the ordered extents to hit the disk.
3265          *
3266          * Otherwise, it would constantly loop over the same extents
3267          * because the old ones don't get deleted  until the IO is
3268          * started
3269          */
3270         btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1,
3271                                WB_SYNC_NONE);
3272         kfree(ra);
3273         trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
3274         if (trans) {
3275                 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
3276                 mark_inode_dirty(inode);
3277         }
3278         mutex_unlock(&inode->i_mutex);
3279         return 0;
3280
3281 truncate_racing:
3282         vmtruncate(inode, inode->i_size);
3283         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
3284                                            total_read);
3285         goto out_unlock;
3286 }
3287
3288 /*
3289  * The back references tell us which tree holds a ref on a block,
3290  * but it is possible for the tree root field in the reference to
3291  * reflect the original root before a snapshot was made.  In this
3292  * case we should search through all the children of a given root
3293  * to find potential holders of references on a block.
3294  *
3295  * Instead, we do something a little less fancy and just search
3296  * all the roots for a given key/block combination.
3297  */
3298 static int find_root_for_ref(struct btrfs_root *root,
3299                              struct btrfs_path *path,
3300                              struct btrfs_key *key0,
3301                              int level,
3302                              int file_key,
3303                              struct btrfs_root **found_root,
3304                              u64 bytenr)
3305 {
3306         struct btrfs_key root_location;
3307         struct btrfs_root *cur_root = *found_root;
3308         struct btrfs_file_extent_item *file_extent;
3309         u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
3310         u64 found_bytenr;
3311         int ret;
3312
3313         root_location.offset = (u64)-1;
3314         root_location.type = BTRFS_ROOT_ITEM_KEY;
3315         path->lowest_level = level;
3316         path->reada = 0;
3317         while(1) {
3318                 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0);
3319                 found_bytenr = 0;
3320                 if (ret == 0 && file_key) {
3321                         struct extent_buffer *leaf = path->nodes[0];
3322                         file_extent = btrfs_item_ptr(leaf, path->slots[0],
3323                                              struct btrfs_file_extent_item);
3324                         if (btrfs_file_extent_type(leaf, file_extent) ==
3325                             BTRFS_FILE_EXTENT_REG) {
3326                                 found_bytenr =
3327                                         btrfs_file_extent_disk_bytenr(leaf,
3328                                                                file_extent);
3329                        }
3330                 } else if (!file_key) {
3331                         if (path->nodes[level])
3332                                 found_bytenr = path->nodes[level]->start;
3333                 }
3334
3335                 btrfs_release_path(cur_root, path);
3336
3337                 if (found_bytenr == bytenr) {
3338                         *found_root = cur_root;
3339                         ret = 0;
3340                         goto out;
3341                 }
3342                 ret = btrfs_search_root(root->fs_info->tree_root,
3343                                         root_search_start, &root_search_start);
3344                 if (ret)
3345                         break;
3346
3347                 root_location.objectid = root_search_start;
3348                 cur_root = btrfs_read_fs_root_no_name(root->fs_info,
3349                                                       &root_location);
3350                 if (!cur_root) {
3351                         ret = 1;
3352                         break;
3353                 }
3354         }
3355 out:
3356         path->lowest_level = 0;
3357         return ret;
3358 }
3359
3360 /*
3361  * note, this releases the path
3362  */
3363 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
3364                                   struct btrfs_path *path,
3365                                   struct btrfs_key *extent_key,
3366                                   u64 *last_file_objectid,
3367                                   u64 *last_file_offset,
3368                                   u64 *last_file_root,
3369                                   u64 last_extent)
3370 {
3371         struct inode *inode;
3372         struct btrfs_root *found_root;
3373         struct btrfs_key root_location;
3374         struct btrfs_key found_key;
3375         struct btrfs_extent_ref *ref;
3376         u64 ref_root;
3377         u64 ref_gen;
3378         u64 ref_objectid;
3379         u64 ref_offset;
3380         int ret;
3381         int level;
3382
3383         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
3384
3385         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
3386                              struct btrfs_extent_ref);
3387         ref_root = btrfs_ref_root(path->nodes[0], ref);
3388         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
3389         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
3390         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
3391         btrfs_release_path(extent_root, path);
3392
3393         root_location.objectid = ref_root;
3394         if (ref_gen == 0)
3395                 root_location.offset = 0;
3396         else
3397                 root_location.offset = (u64)-1;
3398         root_location.type = BTRFS_ROOT_ITEM_KEY;
3399
3400         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
3401                                                 &root_location);
3402         BUG_ON(!found_root);
3403         mutex_unlock(&extent_root->fs_info->alloc_mutex);
3404
3405         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
3406                 found_key.objectid = ref_objectid;
3407                 found_key.type = BTRFS_EXTENT_DATA_KEY;
3408                 found_key.offset = ref_offset;
3409                 level = 0;
3410
3411                 if (last_extent == extent_key->objectid &&
3412                     *last_file_objectid == ref_objectid &&
3413                     *last_file_offset == ref_offset &&
3414                     *last_file_root == ref_root)
3415                         goto out;
3416
3417                 ret = find_root_for_ref(extent_root, path, &found_key,
3418                                         level, 1, &found_root,
3419                                         extent_key->objectid);
3420
3421                 if (ret)
3422                         goto out;
3423
3424                 if (last_extent == extent_key->objectid &&
3425                     *last_file_objectid == ref_objectid &&
3426                     *last_file_offset == ref_offset &&
3427                     *last_file_root == ref_root)
3428                         goto out;
3429
3430                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
3431                                           ref_objectid, found_root);
3432                 if (inode->i_state & I_NEW) {
3433                         /* the inode and parent dir are two different roots */
3434                         BTRFS_I(inode)->root = found_root;
3435                         BTRFS_I(inode)->location.objectid = ref_objectid;
3436                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
3437                         BTRFS_I(inode)->location.offset = 0;
3438                         btrfs_read_locked_inode(inode);
3439                         unlock_new_inode(inode);
3440
3441                 }
3442                 /* this can happen if the reference is not against
3443                  * the latest version of the tree root
3444                  */
3445                 if (is_bad_inode(inode))
3446                         goto out;
3447
3448                 *last_file_objectid = inode->i_ino;
3449                 *last_file_root = found_root->root_key.objectid;
3450                 *last_file_offset = ref_offset;
3451
3452                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
3453                 iput(inode);
3454         } else {
3455                 struct btrfs_trans_handle *trans;
3456                 struct extent_buffer *eb;
3457                 int needs_lock = 0;
3458
3459                 eb = read_tree_block(found_root, extent_key->objectid,
3460                                      extent_key->offset, 0);
3461                 btrfs_tree_lock(eb);
3462                 level = btrfs_header_level(eb);
3463
3464                 if (level == 0)
3465                         btrfs_item_key_to_cpu(eb, &found_key, 0);
3466                 else
3467                         btrfs_node_key_to_cpu(eb, &found_key, 0);
3468
3469                 btrfs_tree_unlock(eb);
3470                 free_extent_buffer(eb);
3471
3472                 ret = find_root_for_ref(extent_root, path, &found_key,
3473                                         level, 0, &found_root,
3474                                         extent_key->objectid);
3475
3476                 if (ret)
3477                         goto out;
3478
3479                 /*
3480                  * right here almost anything could happen to our key,
3481                  * but that's ok.  The cow below will either relocate it
3482                  * or someone else will have relocated it.  Either way,
3483                  * it is in a different spot than it was before and
3484                  * we're happy.
3485                  */
3486
3487                 trans = btrfs_start_transaction(found_root, 1);
3488
3489                 if (found_root == extent_root->fs_info->extent_root ||
3490                     found_root == extent_root->fs_info->chunk_root ||
3491                     found_root == extent_root->fs_info->dev_root) {
3492                         needs_lock = 1;
3493                         mutex_lock(&extent_root->fs_info->alloc_mutex);
3494                 }
3495
3496                 path->lowest_level = level;
3497                 path->reada = 2;
3498                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
3499                                         0, 1);
3500                 path->lowest_level = 0;
3501                 btrfs_release_path(found_root, path);
3502
3503                 if (found_root == found_root->fs_info->extent_root)
3504                         btrfs_extent_post_op(trans, found_root);
3505                 if (needs_lock)
3506                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
3507
3508                 btrfs_end_transaction(trans, found_root);
3509
3510         }
3511 out:
3512         mutex_lock(&extent_root->fs_info->alloc_mutex);
3513         return 0;
3514 }
3515
3516 static int noinline del_extent_zero(struct btrfs_root *extent_root,
3517                                     struct btrfs_path *path,
3518                                     struct btrfs_key *extent_key)
3519 {
3520         int ret;
3521         struct btrfs_trans_handle *trans;
3522
3523         trans = btrfs_start_transaction(extent_root, 1);
3524         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
3525         if (ret > 0) {
3526                 ret = -EIO;
3527                 goto out;
3528         }
3529         if (ret < 0)
3530                 goto out;
3531         ret = btrfs_del_item(trans, extent_root, path);
3532 out:
3533         btrfs_end_transaction(trans, extent_root);
3534         return ret;
3535 }
3536
3537 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
3538                                         struct btrfs_path *path,
3539                                         struct btrfs_key *extent_key)
3540 {
3541         struct btrfs_key key;
3542         struct btrfs_key found_key;
3543         struct extent_buffer *leaf;
3544         u64 last_file_objectid = 0;
3545         u64 last_file_root = 0;
3546         u64 last_file_offset = (u64)-1;
3547         u64 last_extent = 0;
3548         u32 nritems;
3549         u32 item_size;
3550         int ret = 0;
3551
3552         if (extent_key->objectid == 0) {
3553                 ret = del_extent_zero(extent_root, path, extent_key);
3554                 goto out;
3555         }
3556         key.objectid = extent_key->objectid;
3557         key.type = BTRFS_EXTENT_REF_KEY;
3558         key.offset = 0;
3559
3560         while(1) {
3561                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3562
3563                 if (ret < 0)
3564                         goto out;
3565
3566                 ret = 0;
3567                 leaf = path->nodes[0];
3568                 nritems = btrfs_header_nritems(leaf);
3569                 if (path->slots[0] == nritems) {
3570                         ret = btrfs_next_leaf(extent_root, path);
3571                         if (ret > 0) {
3572                                 ret = 0;
3573                                 goto out;
3574                         }
3575                         if (ret < 0)
3576                                 goto out;
3577                         leaf = path->nodes[0];
3578                 }
3579
3580                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3581                 if (found_key.objectid != extent_key->objectid) {
3582                         break;
3583                 }
3584
3585                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
3586                         break;
3587                 }
3588
3589                 key.offset = found_key.offset + 1;
3590                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3591
3592                 ret = relocate_one_reference(extent_root, path, extent_key,
3593                                              &last_file_objectid,
3594                                              &last_file_offset,
3595                                              &last_file_root, last_extent);
3596                 if (ret)
3597                         goto out;
3598                 last_extent = extent_key->objectid;
3599         }
3600         ret = 0;
3601 out:
3602         btrfs_release_path(extent_root, path);
3603         return ret;
3604 }
3605
3606 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
3607 {
3608         u64 num_devices;
3609         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
3610                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
3611
3612         num_devices = root->fs_info->fs_devices->num_devices;
3613         if (num_devices == 1) {
3614                 stripped |= BTRFS_BLOCK_GROUP_DUP;
3615                 stripped = flags & ~stripped;
3616
3617                 /* turn raid0 into single device chunks */
3618                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
3619                         return stripped;
3620
3621                 /* turn mirroring into duplication */
3622                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3623                              BTRFS_BLOCK_GROUP_RAID10))
3624                         return stripped | BTRFS_BLOCK_GROUP_DUP;
3625                 return flags;
3626         } else {
3627                 /* they already had raid on here, just return */
3628                 if (flags & stripped)
3629                         return flags;
3630
3631                 stripped |= BTRFS_BLOCK_GROUP_DUP;
3632                 stripped = flags & ~stripped;
3633
3634                 /* switch duplicated blocks with raid1 */
3635                 if (flags & BTRFS_BLOCK_GROUP_DUP)
3636                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
3637
3638                 /* turn single device chunks into raid0 */
3639                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
3640         }
3641         return flags;
3642 }
3643
3644 int __alloc_chunk_for_shrink(struct btrfs_root *root,
3645                      struct btrfs_block_group_cache *shrink_block_group,
3646                      int force)
3647 {
3648         struct btrfs_trans_handle *trans;
3649         u64 new_alloc_flags;
3650         u64 calc;
3651
3652         spin_lock(&shrink_block_group->lock);
3653         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
3654                 spin_unlock(&shrink_block_group->lock);
3655                 mutex_unlock(&root->fs_info->alloc_mutex);
3656
3657                 trans = btrfs_start_transaction(root, 1);
3658                 mutex_lock(&root->fs_info->alloc_mutex);
3659                 spin_lock(&shrink_block_group->lock);
3660
3661                 new_alloc_flags = update_block_group_flags(root,
3662                                                    shrink_block_group->flags);
3663                 if (new_alloc_flags != shrink_block_group->flags) {
3664                         calc =
3665                              btrfs_block_group_used(&shrink_block_group->item);
3666                 } else {
3667                         calc = shrink_block_group->key.offset;
3668                 }
3669                 spin_unlock(&shrink_block_group->lock);
3670
3671                 do_chunk_alloc(trans, root->fs_info->extent_root,
3672                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
3673
3674                 mutex_unlock(&root->fs_info->alloc_mutex);
3675                 btrfs_end_transaction(trans, root);
3676                 mutex_lock(&root->fs_info->alloc_mutex);
3677         } else
3678                 spin_unlock(&shrink_block_group->lock);
3679         return 0;
3680 }
3681
3682 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
3683 {
3684         struct btrfs_trans_handle *trans;
3685         struct btrfs_root *tree_root = root->fs_info->tree_root;
3686         struct btrfs_path *path;
3687         u64 cur_byte;
3688         u64 total_found;
3689         u64 shrink_last_byte;
3690         struct btrfs_block_group_cache *shrink_block_group;
3691         struct btrfs_key key;
3692         struct btrfs_key found_key;
3693         struct extent_buffer *leaf;
3694         u32 nritems;
3695         int ret;
3696         int progress;
3697
3698         mutex_lock(&root->fs_info->alloc_mutex);
3699         shrink_block_group = btrfs_lookup_block_group(root->fs_info,
3700                                                       shrink_start);
3701         BUG_ON(!shrink_block_group);
3702
3703         shrink_last_byte = shrink_block_group->key.objectid +
3704                 shrink_block_group->key.offset;
3705
3706         shrink_block_group->space_info->total_bytes -=
3707                 shrink_block_group->key.offset;
3708         path = btrfs_alloc_path();
3709         root = root->fs_info->extent_root;
3710         path->reada = 2;
3711
3712         printk("btrfs relocating block group %llu flags %llu\n",
3713                (unsigned long long)shrink_start,
3714                (unsigned long long)shrink_block_group->flags);
3715
3716         __alloc_chunk_for_shrink(root, shrink_block_group, 1);
3717
3718 again:
3719
3720         shrink_block_group->ro = 1;
3721
3722         total_found = 0;
3723         progress = 0;
3724         key.objectid = shrink_start;
3725         key.offset = 0;
3726         key.type = 0;
3727         cur_byte = key.objectid;
3728
3729         mutex_unlock(&root->fs_info->alloc_mutex);
3730
3731         btrfs_start_delalloc_inodes(root);
3732         btrfs_wait_ordered_extents(tree_root, 0);
3733
3734         mutex_lock(&root->fs_info->alloc_mutex);
3735
3736         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3737         if (ret < 0)
3738                 goto out;
3739
3740         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
3741         if (ret < 0)
3742                 goto out;
3743
3744         if (ret == 0) {
3745                 leaf = path->nodes[0];
3746                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3747                 if (found_key.objectid + found_key.offset > shrink_start &&
3748                     found_key.objectid < shrink_last_byte) {
3749                         cur_byte = found_key.objectid;
3750                         key.objectid = cur_byte;
3751                 }
3752         }
3753         btrfs_release_path(root, path);
3754
3755         while(1) {
3756                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3757                 if (ret < 0)
3758                         goto out;
3759
3760 next:
3761                 leaf = path->nodes[0];
3762                 nritems = btrfs_header_nritems(leaf);
3763                 if (path->slots[0] >= nritems) {
3764                         ret = btrfs_next_leaf(root, path);
3765                         if (ret < 0)
3766                                 goto out;
3767                         if (ret == 1) {
3768                                 ret = 0;
3769                                 break;
3770                         }
3771                         leaf = path->nodes[0];
3772                         nritems = btrfs_header_nritems(leaf);
3773                 }
3774
3775                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3776
3777                 if (found_key.objectid >= shrink_last_byte)
3778                         break;
3779
3780                 if (progress && need_resched()) {
3781                         memcpy(&key, &found_key, sizeof(key));
3782                         cond_resched();
3783                         btrfs_release_path(root, path);
3784                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
3785                         progress = 0;
3786                         goto next;
3787                 }
3788                 progress = 1;
3789
3790                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
3791                     found_key.objectid + found_key.offset <= cur_byte) {
3792                         memcpy(&key, &found_key, sizeof(key));
3793                         key.offset++;
3794                         path->slots[0]++;
3795                         goto next;
3796                 }
3797
3798                 total_found++;
3799                 cur_byte = found_key.objectid + found_key.offset;
3800                 key.objectid = cur_byte;
3801                 btrfs_release_path(root, path);
3802                 ret = relocate_one_extent(root, path, &found_key);
3803                 __alloc_chunk_for_shrink(root, shrink_block_group, 0);
3804         }
3805
3806         btrfs_release_path(root, path);
3807
3808         if (total_found > 0) {
3809                 printk("btrfs relocate found %llu last extent was %llu\n",
3810                        (unsigned long long)total_found,
3811                        (unsigned long long)found_key.objectid);
3812                 mutex_unlock(&root->fs_info->alloc_mutex);
3813                 trans = btrfs_start_transaction(tree_root, 1);
3814                 btrfs_commit_transaction(trans, tree_root);
3815
3816                 btrfs_clean_old_snapshots(tree_root);
3817
3818                 btrfs_start_delalloc_inodes(root);
3819                 btrfs_wait_ordered_extents(tree_root, 0);
3820
3821                 trans = btrfs_start_transaction(tree_root, 1);
3822                 btrfs_commit_transaction(trans, tree_root);
3823                 mutex_lock(&root->fs_info->alloc_mutex);
3824                 goto again;
3825         }
3826
3827         /*
3828          * we've freed all the extents, now remove the block
3829          * group item from the tree
3830          */
3831         mutex_unlock(&root->fs_info->alloc_mutex);
3832
3833         trans = btrfs_start_transaction(root, 1);
3834
3835         mutex_lock(&root->fs_info->alloc_mutex);
3836         memcpy(&key, &shrink_block_group->key, sizeof(key));
3837
3838         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3839         if (ret > 0)
3840                 ret = -EIO;
3841         if (ret < 0) {
3842                 btrfs_end_transaction(trans, root);
3843                 goto out;
3844         }
3845
3846         spin_lock(&root->fs_info->block_group_cache_lock);
3847         rb_erase(&shrink_block_group->cache_node,
3848                  &root->fs_info->block_group_cache_tree);
3849         spin_unlock(&root->fs_info->block_group_cache_lock);
3850
3851         ret = btrfs_remove_free_space(shrink_block_group, key.objectid,
3852                                       key.offset);
3853         if (ret) {
3854                 btrfs_end_transaction(trans, root);
3855                 goto out;
3856         }
3857         /*
3858         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3859         kfree(shrink_block_group);
3860         */
3861
3862         btrfs_del_item(trans, root, path);
3863         btrfs_release_path(root, path);
3864         mutex_unlock(&root->fs_info->alloc_mutex);
3865         btrfs_commit_transaction(trans, root);
3866
3867         mutex_lock(&root->fs_info->alloc_mutex);
3868
3869         /* the code to unpin extents might set a few bits in the free
3870          * space cache for this range again
3871          */
3872         /* XXX? */
3873         ret = btrfs_remove_free_space(shrink_block_group, key.objectid,
3874                                       key.offset);
3875 out:
3876         btrfs_free_path(path);
3877         mutex_unlock(&root->fs_info->alloc_mutex);
3878         return ret;
3879 }
3880
3881 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3882                            struct btrfs_key *key)
3883 {
3884         int ret = 0;
3885         struct btrfs_key found_key;
3886         struct extent_buffer *leaf;
3887         int slot;
3888
3889         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3890         if (ret < 0)
3891                 goto out;
3892
3893         while(1) {
3894                 slot = path->slots[0];
3895                 leaf = path->nodes[0];
3896                 if (slot >= btrfs_header_nritems(leaf)) {
3897                         ret = btrfs_next_leaf(root, path);
3898                         if (ret == 0)
3899                                 continue;
3900                         if (ret < 0)
3901                                 goto out;
3902                         break;
3903                 }
3904                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3905
3906                 if (found_key.objectid >= key->objectid &&
3907                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3908                         ret = 0;
3909                         goto out;
3910                 }
3911                 path->slots[0]++;
3912         }
3913         ret = -ENOENT;
3914 out:
3915         return ret;
3916 }
3917
3918 int btrfs_read_block_groups(struct btrfs_root *root)
3919 {
3920         struct btrfs_path *path;
3921         int ret;
3922         struct btrfs_block_group_cache *cache;
3923         struct btrfs_fs_info *info = root->fs_info;
3924         struct btrfs_space_info *space_info;
3925         struct btrfs_key key;
3926         struct btrfs_key found_key;
3927         struct extent_buffer *leaf;
3928
3929         root = info->extent_root;
3930         key.objectid = 0;
3931         key.offset = 0;
3932         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3933         path = btrfs_alloc_path();
3934         if (!path)
3935                 return -ENOMEM;
3936
3937         mutex_lock(&root->fs_info->alloc_mutex);
3938         while(1) {
3939                 ret = find_first_block_group(root, path, &key);
3940                 if (ret > 0) {
3941                         ret = 0;
3942                         goto error;
3943                 }
3944                 if (ret != 0)
3945                         goto error;
3946
3947                 leaf = path->nodes[0];
3948                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3949                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3950                 if (!cache) {
3951                         ret = -ENOMEM;
3952                         break;
3953                 }
3954
3955                 spin_lock_init(&cache->lock);
3956                 INIT_LIST_HEAD(&cache->list);
3957                 read_extent_buffer(leaf, &cache->item,
3958                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
3959                                    sizeof(cache->item));
3960                 memcpy(&cache->key, &found_key, sizeof(found_key));
3961
3962                 key.objectid = found_key.objectid + found_key.offset;
3963                 btrfs_release_path(root, path);
3964                 cache->flags = btrfs_block_group_flags(&cache->item);
3965
3966                 ret = update_space_info(info, cache->flags, found_key.offset,
3967                                         btrfs_block_group_used(&cache->item),
3968                                         &space_info);
3969                 BUG_ON(ret);
3970                 cache->space_info = space_info;
3971                 spin_lock(&space_info->lock);
3972                 list_add(&cache->list, &space_info->block_groups);
3973                 spin_unlock(&space_info->lock);
3974
3975                 ret = btrfs_add_block_group_cache(root->fs_info, cache);
3976                 BUG_ON(ret);
3977
3978                 if (key.objectid >=
3979                     btrfs_super_total_bytes(&info->super_copy))
3980                         break;
3981         }
3982         ret = 0;
3983 error:
3984         btrfs_free_path(path);
3985         mutex_unlock(&root->fs_info->alloc_mutex);
3986         return ret;
3987 }
3988
3989 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3990                            struct btrfs_root *root, u64 bytes_used,
3991                            u64 type, u64 chunk_objectid, u64 chunk_offset,
3992                            u64 size)
3993 {
3994         int ret;
3995         struct btrfs_root *extent_root;
3996         struct btrfs_block_group_cache *cache;
3997
3998         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
3999         extent_root = root->fs_info->extent_root;
4000
4001         root->fs_info->last_trans_new_blockgroup = trans->transid;
4002
4003         cache = kzalloc(sizeof(*cache), GFP_NOFS);
4004         if (!cache)
4005                 return -ENOMEM;
4006
4007         cache->key.objectid = chunk_offset;
4008         cache->key.offset = size;
4009         spin_lock_init(&cache->lock);
4010         INIT_LIST_HEAD(&cache->list);
4011         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
4012
4013         btrfs_set_block_group_used(&cache->item, bytes_used);
4014         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
4015         cache->flags = type;
4016         btrfs_set_block_group_flags(&cache->item, type);
4017
4018         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
4019                                 &cache->space_info);
4020         BUG_ON(ret);
4021         spin_lock(&cache->space_info->lock);
4022         list_add(&cache->list, &cache->space_info->block_groups);
4023         spin_unlock(&cache->space_info->lock);
4024
4025         ret = btrfs_add_block_group_cache(root->fs_info, cache);
4026         BUG_ON(ret);
4027
4028         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
4029                                 sizeof(cache->item));
4030         BUG_ON(ret);
4031
4032         finish_current_insert(trans, extent_root);
4033         ret = del_pending_extents(trans, extent_root);
4034         BUG_ON(ret);
4035         set_avail_alloc_bits(extent_root->fs_info, type);
4036
4037         return 0;
4038 }