Btrfs: Add some conditional schedules near the alloc_mutex
[safe/jmp/linux-2.6] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include "hash.h"
22 #include "crc32c.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "volumes.h"
28 #include "locking.h"
29
30 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
33
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
35
36 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
37                                  btrfs_root *extent_root);
38 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
39                                btrfs_root *extent_root);
40 static struct btrfs_block_group_cache *
41 __btrfs_find_block_group(struct btrfs_root *root,
42                          struct btrfs_block_group_cache *hint,
43                          u64 search_start, int data, int owner);
44
45 void maybe_lock_mutex(struct btrfs_root *root)
46 {
47         if (root != root->fs_info->extent_root &&
48             root != root->fs_info->chunk_root &&
49             root != root->fs_info->dev_root) {
50                 mutex_lock(&root->fs_info->alloc_mutex);
51         }
52 }
53
54 void maybe_unlock_mutex(struct btrfs_root *root)
55 {
56         if (root != root->fs_info->extent_root &&
57             root != root->fs_info->chunk_root &&
58             root != root->fs_info->dev_root) {
59                 mutex_unlock(&root->fs_info->alloc_mutex);
60         }
61 }
62
63 static int cache_block_group(struct btrfs_root *root,
64                              struct btrfs_block_group_cache *block_group)
65 {
66         struct btrfs_path *path;
67         int ret;
68         struct btrfs_key key;
69         struct extent_buffer *leaf;
70         struct extent_io_tree *free_space_cache;
71         int slot;
72         u64 last = 0;
73         u64 hole_size;
74         u64 first_free;
75         int found = 0;
76
77         if (!block_group)
78                 return 0;
79
80         root = root->fs_info->extent_root;
81         free_space_cache = &root->fs_info->free_space_cache;
82
83         if (block_group->cached)
84                 return 0;
85
86         path = btrfs_alloc_path();
87         if (!path)
88                 return -ENOMEM;
89
90         path->reada = 2;
91         /*
92          * we get into deadlocks with paths held by callers of this function.
93          * since the alloc_mutex is protecting things right now, just
94          * skip the locking here
95          */
96         path->skip_locking = 1;
97         first_free = block_group->key.objectid;
98         key.objectid = block_group->key.objectid;
99         key.offset = 0;
100         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
101         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
102         if (ret < 0)
103                 return ret;
104         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
105         if (ret < 0)
106                 return ret;
107         if (ret == 0) {
108                 leaf = path->nodes[0];
109                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
110                 if (key.objectid + key.offset > first_free)
111                         first_free = key.objectid + key.offset;
112         }
113         while(1) {
114                 leaf = path->nodes[0];
115                 slot = path->slots[0];
116                 if (slot >= btrfs_header_nritems(leaf)) {
117                         ret = btrfs_next_leaf(root, path);
118                         if (ret < 0)
119                                 goto err;
120                         if (ret == 0) {
121                                 continue;
122                         } else {
123                                 break;
124                         }
125                 }
126                 btrfs_item_key_to_cpu(leaf, &key, slot);
127                 if (key.objectid < block_group->key.objectid) {
128                         goto next;
129                 }
130                 if (key.objectid >= block_group->key.objectid +
131                     block_group->key.offset) {
132                         break;
133                 }
134
135                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
136                         if (!found) {
137                                 last = first_free;
138                                 found = 1;
139                         }
140                         if (key.objectid > last) {
141                                 hole_size = key.objectid - last;
142                                 set_extent_dirty(free_space_cache, last,
143                                                  last + hole_size - 1,
144                                                  GFP_NOFS);
145                         }
146                         last = key.objectid + key.offset;
147                 }
148 next:
149                 path->slots[0]++;
150         }
151
152         if (!found)
153                 last = first_free;
154         if (block_group->key.objectid +
155             block_group->key.offset > last) {
156                 hole_size = block_group->key.objectid +
157                         block_group->key.offset - last;
158                 set_extent_dirty(free_space_cache, last,
159                                  last + hole_size - 1, GFP_NOFS);
160         }
161         block_group->cached = 1;
162 err:
163         btrfs_free_path(path);
164         return 0;
165 }
166
167 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
168                                                        btrfs_fs_info *info,
169                                                          u64 bytenr)
170 {
171         struct extent_io_tree *block_group_cache;
172         struct btrfs_block_group_cache *block_group = NULL;
173         u64 ptr;
174         u64 start;
175         u64 end;
176         int ret;
177
178         bytenr = max_t(u64, bytenr,
179                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
180         block_group_cache = &info->block_group_cache;
181         ret = find_first_extent_bit(block_group_cache,
182                                     bytenr, &start, &end,
183                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
184                                     BLOCK_GROUP_SYSTEM);
185         if (ret) {
186                 return NULL;
187         }
188         ret = get_state_private(block_group_cache, start, &ptr);
189         if (ret)
190                 return NULL;
191
192         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
193         return block_group;
194 }
195
196 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
197                                                          btrfs_fs_info *info,
198                                                          u64 bytenr)
199 {
200         struct extent_io_tree *block_group_cache;
201         struct btrfs_block_group_cache *block_group = NULL;
202         u64 ptr;
203         u64 start;
204         u64 end;
205         int ret;
206
207         bytenr = max_t(u64, bytenr,
208                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
209         block_group_cache = &info->block_group_cache;
210         ret = find_first_extent_bit(block_group_cache,
211                                     bytenr, &start, &end,
212                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
213                                     BLOCK_GROUP_SYSTEM);
214         if (ret) {
215                 return NULL;
216         }
217         ret = get_state_private(block_group_cache, start, &ptr);
218         if (ret)
219                 return NULL;
220
221         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
222         if (block_group->key.objectid <= bytenr && bytenr <
223             block_group->key.objectid + block_group->key.offset)
224                 return block_group;
225         return NULL;
226 }
227
228 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
229 {
230         return (cache->flags & bits) == bits;
231 }
232
233 static int noinline find_search_start(struct btrfs_root *root,
234                               struct btrfs_block_group_cache **cache_ret,
235                               u64 *start_ret, u64 num, int data)
236 {
237         int ret;
238         struct btrfs_block_group_cache *cache = *cache_ret;
239         struct extent_io_tree *free_space_cache;
240         struct extent_state *state;
241         u64 last;
242         u64 start = 0;
243         u64 cache_miss = 0;
244         u64 total_fs_bytes;
245         u64 search_start = *start_ret;
246         int wrapped = 0;
247
248         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
249         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
250         free_space_cache = &root->fs_info->free_space_cache;
251
252         if (!cache)
253                 goto out;
254
255 again:
256         ret = cache_block_group(root, cache);
257         if (ret) {
258                 goto out;
259         }
260
261         last = max(search_start, cache->key.objectid);
262         if (!block_group_bits(cache, data) || cache->ro)
263                 goto new_group;
264
265         spin_lock_irq(&free_space_cache->lock);
266         state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
267         while(1) {
268                 if (!state) {
269                         if (!cache_miss)
270                                 cache_miss = last;
271                         spin_unlock_irq(&free_space_cache->lock);
272                         goto new_group;
273                 }
274
275                 start = max(last, state->start);
276                 last = state->end + 1;
277                 if (last - start < num) {
278                         do {
279                                 state = extent_state_next(state);
280                         } while(state && !(state->state & EXTENT_DIRTY));
281                         continue;
282                 }
283                 spin_unlock_irq(&free_space_cache->lock);
284                 if (cache->ro) {
285                         goto new_group;
286                 }
287                 if (start + num > cache->key.objectid + cache->key.offset)
288                         goto new_group;
289                 if (!block_group_bits(cache, data)) {
290                         printk("block group bits don't match %Lu %d\n", cache->flags, data);
291                 }
292                 *start_ret = start;
293                 return 0;
294         }
295 out:
296         cache = btrfs_lookup_block_group(root->fs_info, search_start);
297         if (!cache) {
298                 printk("Unable to find block group for %Lu\n", search_start);
299                 WARN_ON(1);
300         }
301         return -ENOSPC;
302
303 new_group:
304         last = cache->key.objectid + cache->key.offset;
305 wrapped:
306         cache = btrfs_lookup_first_block_group(root->fs_info, last);
307         if (!cache || cache->key.objectid >= total_fs_bytes) {
308 no_cache:
309                 if (!wrapped) {
310                         wrapped = 1;
311                         last = search_start;
312                         goto wrapped;
313                 }
314                 goto out;
315         }
316         if (cache_miss && !cache->cached) {
317                 cache_block_group(root, cache);
318                 last = cache_miss;
319                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
320         }
321         cache_miss = 0;
322         cache = __btrfs_find_block_group(root, cache, last, data, 0);
323         if (!cache)
324                 goto no_cache;
325         *cache_ret = cache;
326         goto again;
327 }
328
329 static u64 div_factor(u64 num, int factor)
330 {
331         if (factor == 10)
332                 return num;
333         num *= factor;
334         do_div(num, 10);
335         return num;
336 }
337
338 static int block_group_state_bits(u64 flags)
339 {
340         int bits = 0;
341         if (flags & BTRFS_BLOCK_GROUP_DATA)
342                 bits |= BLOCK_GROUP_DATA;
343         if (flags & BTRFS_BLOCK_GROUP_METADATA)
344                 bits |= BLOCK_GROUP_METADATA;
345         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
346                 bits |= BLOCK_GROUP_SYSTEM;
347         return bits;
348 }
349
350 static struct btrfs_block_group_cache *
351 __btrfs_find_block_group(struct btrfs_root *root,
352                          struct btrfs_block_group_cache *hint,
353                          u64 search_start, int data, int owner)
354 {
355         struct btrfs_block_group_cache *cache;
356         struct extent_io_tree *block_group_cache;
357         struct btrfs_block_group_cache *found_group = NULL;
358         struct btrfs_fs_info *info = root->fs_info;
359         u64 used;
360         u64 last = 0;
361         u64 start;
362         u64 end;
363         u64 free_check;
364         u64 ptr;
365         int bit;
366         int ret;
367         int full_search = 0;
368         int factor = 10;
369         int wrapped = 0;
370
371         block_group_cache = &info->block_group_cache;
372
373         if (data & BTRFS_BLOCK_GROUP_METADATA)
374                 factor = 9;
375
376         bit = block_group_state_bits(data);
377
378         if (search_start) {
379                 struct btrfs_block_group_cache *shint;
380                 shint = btrfs_lookup_first_block_group(info, search_start);
381                 if (shint && block_group_bits(shint, data) && !shint->ro) {
382                         used = btrfs_block_group_used(&shint->item);
383                         if (used + shint->pinned <
384                             div_factor(shint->key.offset, factor)) {
385                                 return shint;
386                         }
387                 }
388         }
389         if (hint && !hint->ro && block_group_bits(hint, data)) {
390                 used = btrfs_block_group_used(&hint->item);
391                 if (used + hint->pinned <
392                     div_factor(hint->key.offset, factor)) {
393                         return hint;
394                 }
395                 last = hint->key.objectid + hint->key.offset;
396         } else {
397                 if (hint)
398                         last = max(hint->key.objectid, search_start);
399                 else
400                         last = search_start;
401         }
402 again:
403         while(1) {
404                 ret = find_first_extent_bit(block_group_cache, last,
405                                             &start, &end, bit);
406                 if (ret)
407                         break;
408
409                 ret = get_state_private(block_group_cache, start, &ptr);
410                 if (ret) {
411                         last = end + 1;
412                         continue;
413                 }
414
415                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
416                 last = cache->key.objectid + cache->key.offset;
417                 used = btrfs_block_group_used(&cache->item);
418
419                 if (!cache->ro && block_group_bits(cache, data)) {
420                         free_check = div_factor(cache->key.offset, factor);
421                         if (used + cache->pinned < free_check) {
422                                 found_group = cache;
423                                 goto found;
424                         }
425                 }
426                 cond_resched();
427         }
428         if (!wrapped) {
429                 last = search_start;
430                 wrapped = 1;
431                 goto again;
432         }
433         if (!full_search && factor < 10) {
434                 last = search_start;
435                 full_search = 1;
436                 factor = 10;
437                 goto again;
438         }
439 found:
440         return found_group;
441 }
442
443 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
444                                                  struct btrfs_block_group_cache
445                                                  *hint, u64 search_start,
446                                                  int data, int owner)
447 {
448
449         struct btrfs_block_group_cache *ret;
450         mutex_lock(&root->fs_info->alloc_mutex);
451         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
452         mutex_unlock(&root->fs_info->alloc_mutex);
453         return ret;
454 }
455 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
456                            u64 owner, u64 owner_offset)
457 {
458         u32 high_crc = ~(u32)0;
459         u32 low_crc = ~(u32)0;
460         __le64 lenum;
461         lenum = cpu_to_le64(root_objectid);
462         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
463         lenum = cpu_to_le64(ref_generation);
464         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
465         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
466                 lenum = cpu_to_le64(owner);
467                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
468                 lenum = cpu_to_le64(owner_offset);
469                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
470         }
471         return ((u64)high_crc << 32) | (u64)low_crc;
472 }
473
474 static int match_extent_ref(struct extent_buffer *leaf,
475                             struct btrfs_extent_ref *disk_ref,
476                             struct btrfs_extent_ref *cpu_ref)
477 {
478         int ret;
479         int len;
480
481         if (cpu_ref->objectid)
482                 len = sizeof(*cpu_ref);
483         else
484                 len = 2 * sizeof(u64);
485         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
486                                    len);
487         return ret == 0;
488 }
489
490 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
491                                           struct btrfs_root *root,
492                                           struct btrfs_path *path, u64 bytenr,
493                                           u64 root_objectid,
494                                           u64 ref_generation, u64 owner,
495                                           u64 owner_offset, int del)
496 {
497         u64 hash;
498         struct btrfs_key key;
499         struct btrfs_key found_key;
500         struct btrfs_extent_ref ref;
501         struct extent_buffer *leaf;
502         struct btrfs_extent_ref *disk_ref;
503         int ret;
504         int ret2;
505
506         btrfs_set_stack_ref_root(&ref, root_objectid);
507         btrfs_set_stack_ref_generation(&ref, ref_generation);
508         btrfs_set_stack_ref_objectid(&ref, owner);
509         btrfs_set_stack_ref_offset(&ref, owner_offset);
510
511         hash = hash_extent_ref(root_objectid, ref_generation, owner,
512                                owner_offset);
513         key.offset = hash;
514         key.objectid = bytenr;
515         key.type = BTRFS_EXTENT_REF_KEY;
516
517         while (1) {
518                 ret = btrfs_search_slot(trans, root, &key, path,
519                                         del ? -1 : 0, del);
520                 if (ret < 0)
521                         goto out;
522                 leaf = path->nodes[0];
523                 if (ret != 0) {
524                         u32 nritems = btrfs_header_nritems(leaf);
525                         if (path->slots[0] >= nritems) {
526                                 ret2 = btrfs_next_leaf(root, path);
527                                 if (ret2)
528                                         goto out;
529                                 leaf = path->nodes[0];
530                         }
531                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
532                         if (found_key.objectid != bytenr ||
533                             found_key.type != BTRFS_EXTENT_REF_KEY)
534                                 goto out;
535                         key.offset = found_key.offset;
536                         if (del) {
537                                 btrfs_release_path(root, path);
538                                 continue;
539                         }
540                 }
541                 disk_ref = btrfs_item_ptr(path->nodes[0],
542                                           path->slots[0],
543                                           struct btrfs_extent_ref);
544                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
545                         ret = 0;
546                         goto out;
547                 }
548                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
549                 key.offset = found_key.offset + 1;
550                 btrfs_release_path(root, path);
551         }
552 out:
553         return ret;
554 }
555
556 /*
557  * Back reference rules.  Back refs have three main goals:
558  *
559  * 1) differentiate between all holders of references to an extent so that
560  *    when a reference is dropped we can make sure it was a valid reference
561  *    before freeing the extent.
562  *
563  * 2) Provide enough information to quickly find the holders of an extent
564  *    if we notice a given block is corrupted or bad.
565  *
566  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
567  *    maintenance.  This is actually the same as #2, but with a slightly
568  *    different use case.
569  *
570  * File extents can be referenced by:
571  *
572  * - multiple snapshots, subvolumes, or different generations in one subvol
573  * - different files inside a single subvolume (in theory, not implemented yet)
574  * - different offsets inside a file (bookend extents in file.c)
575  *
576  * The extent ref structure has fields for:
577  *
578  * - Objectid of the subvolume root
579  * - Generation number of the tree holding the reference
580  * - objectid of the file holding the reference
581  * - offset in the file corresponding to the key holding the reference
582  *
583  * When a file extent is allocated the fields are filled in:
584  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
585  *
586  * When a leaf is cow'd new references are added for every file extent found
587  * in the leaf.  It looks the same as the create case, but trans->transid
588  * will be different when the block is cow'd.
589  *
590  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
591  *
592  * When a file extent is removed either during snapshot deletion or file
593  * truncation, the corresponding back reference is found
594  * by searching for:
595  *
596  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
597  *      inode objectid, offset in file)
598  *
599  * Btree extents can be referenced by:
600  *
601  * - Different subvolumes
602  * - Different generations of the same subvolume
603  *
604  * Storing sufficient information for a full reverse mapping of a btree
605  * block would require storing the lowest key of the block in the backref,
606  * and it would require updating that lowest key either before write out or
607  * every time it changed.  Instead, the objectid of the lowest key is stored
608  * along with the level of the tree block.  This provides a hint
609  * about where in the btree the block can be found.  Searches through the
610  * btree only need to look for a pointer to that block, so they stop one
611  * level higher than the level recorded in the backref.
612  *
613  * Some btrees do not do reference counting on their extents.  These
614  * include the extent tree and the tree of tree roots.  Backrefs for these
615  * trees always have a generation of zero.
616  *
617  * When a tree block is created, back references are inserted:
618  *
619  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
620  *
621  * When a tree block is cow'd in a reference counted root,
622  * new back references are added for all the blocks it points to.
623  * These are of the form (trans->transid will have increased since creation):
624  *
625  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
626  *
627  * Because the lowest_key_objectid and the level are just hints
628  * they are not used when backrefs are deleted.  When a backref is deleted:
629  *
630  * if backref was for a tree root:
631  *     root_objectid = root->root_key.objectid
632  * else
633  *     root_objectid = btrfs_header_owner(parent)
634  *
635  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
636  *
637  * Back Reference Key hashing:
638  *
639  * Back references have four fields, each 64 bits long.  Unfortunately,
640  * This is hashed into a single 64 bit number and placed into the key offset.
641  * The key objectid corresponds to the first byte in the extent, and the
642  * key type is set to BTRFS_EXTENT_REF_KEY
643  */
644 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
645                                  struct btrfs_root *root,
646                                  struct btrfs_path *path, u64 bytenr,
647                                  u64 root_objectid, u64 ref_generation,
648                                  u64 owner, u64 owner_offset)
649 {
650         u64 hash;
651         struct btrfs_key key;
652         struct btrfs_extent_ref ref;
653         struct btrfs_extent_ref *disk_ref;
654         int ret;
655
656         btrfs_set_stack_ref_root(&ref, root_objectid);
657         btrfs_set_stack_ref_generation(&ref, ref_generation);
658         btrfs_set_stack_ref_objectid(&ref, owner);
659         btrfs_set_stack_ref_offset(&ref, owner_offset);
660
661         hash = hash_extent_ref(root_objectid, ref_generation, owner,
662                                owner_offset);
663         key.offset = hash;
664         key.objectid = bytenr;
665         key.type = BTRFS_EXTENT_REF_KEY;
666
667         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
668         while (ret == -EEXIST) {
669                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
670                                           struct btrfs_extent_ref);
671                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
672                         goto out;
673                 key.offset++;
674                 btrfs_release_path(root, path);
675                 ret = btrfs_insert_empty_item(trans, root, path, &key,
676                                               sizeof(ref));
677         }
678         if (ret)
679                 goto out;
680         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
681                                   struct btrfs_extent_ref);
682         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
683                             sizeof(ref));
684         btrfs_mark_buffer_dirty(path->nodes[0]);
685 out:
686         btrfs_release_path(root, path);
687         return ret;
688 }
689
690 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
691                                 struct btrfs_root *root,
692                                 u64 bytenr, u64 num_bytes,
693                                 u64 root_objectid, u64 ref_generation,
694                                 u64 owner, u64 owner_offset)
695 {
696         struct btrfs_path *path;
697         int ret;
698         struct btrfs_key key;
699         struct extent_buffer *l;
700         struct btrfs_extent_item *item;
701         u32 refs;
702
703         WARN_ON(num_bytes < root->sectorsize);
704         path = btrfs_alloc_path();
705         if (!path)
706                 return -ENOMEM;
707
708         path->reada = 1;
709         key.objectid = bytenr;
710         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
711         key.offset = num_bytes;
712         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
713                                 0, 1);
714         if (ret < 0)
715                 return ret;
716         if (ret != 0) {
717                 BUG();
718         }
719         BUG_ON(ret != 0);
720         l = path->nodes[0];
721         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
722         refs = btrfs_extent_refs(l, item);
723         btrfs_set_extent_refs(l, item, refs + 1);
724         btrfs_mark_buffer_dirty(path->nodes[0]);
725
726         btrfs_release_path(root->fs_info->extent_root, path);
727
728         path->reada = 1;
729         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
730                                           path, bytenr, root_objectid,
731                                           ref_generation, owner, owner_offset);
732         BUG_ON(ret);
733         finish_current_insert(trans, root->fs_info->extent_root);
734         del_pending_extents(trans, root->fs_info->extent_root);
735
736         btrfs_free_path(path);
737         return 0;
738 }
739
740 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
741                                 struct btrfs_root *root,
742                                 u64 bytenr, u64 num_bytes,
743                                 u64 root_objectid, u64 ref_generation,
744                                 u64 owner, u64 owner_offset)
745 {
746         int ret;
747
748         mutex_lock(&root->fs_info->alloc_mutex);
749         ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
750                                      root_objectid, ref_generation,
751                                      owner, owner_offset);
752         mutex_unlock(&root->fs_info->alloc_mutex);
753         return ret;
754 }
755
756 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
757                          struct btrfs_root *root)
758 {
759         finish_current_insert(trans, root->fs_info->extent_root);
760         del_pending_extents(trans, root->fs_info->extent_root);
761         return 0;
762 }
763
764 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
765                              struct btrfs_root *root, u64 bytenr,
766                              u64 num_bytes, u32 *refs)
767 {
768         struct btrfs_path *path;
769         int ret;
770         struct btrfs_key key;
771         struct extent_buffer *l;
772         struct btrfs_extent_item *item;
773
774         WARN_ON(num_bytes < root->sectorsize);
775         path = btrfs_alloc_path();
776         path->reada = 1;
777         key.objectid = bytenr;
778         key.offset = num_bytes;
779         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
780         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
781                                 0, 0);
782         if (ret < 0)
783                 goto out;
784         if (ret != 0) {
785                 btrfs_print_leaf(root, path->nodes[0]);
786                 printk("failed to find block number %Lu\n", bytenr);
787                 BUG();
788         }
789         l = path->nodes[0];
790         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
791         *refs = btrfs_extent_refs(l, item);
792 out:
793         btrfs_free_path(path);
794         return 0;
795 }
796
797 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
798                                   struct btrfs_path *count_path,
799                                   u64 expected_owner,
800                                   u64 first_extent)
801 {
802         struct btrfs_root *extent_root = root->fs_info->extent_root;
803         struct btrfs_path *path;
804         u64 bytenr;
805         u64 found_objectid;
806         u64 found_owner;
807         u64 root_objectid = root->root_key.objectid;
808         u32 total_count = 0;
809         u32 extent_refs;
810         u32 cur_count;
811         u32 nritems;
812         int ret;
813         struct btrfs_key key;
814         struct btrfs_key found_key;
815         struct extent_buffer *l;
816         struct btrfs_extent_item *item;
817         struct btrfs_extent_ref *ref_item;
818         int level = -1;
819
820         /* FIXME, needs locking */
821         BUG();
822
823         mutex_lock(&root->fs_info->alloc_mutex);
824         path = btrfs_alloc_path();
825 again:
826         if (level == -1)
827                 bytenr = first_extent;
828         else
829                 bytenr = count_path->nodes[level]->start;
830
831         cur_count = 0;
832         key.objectid = bytenr;
833         key.offset = 0;
834
835         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
836         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
837         if (ret < 0)
838                 goto out;
839         BUG_ON(ret == 0);
840
841         l = path->nodes[0];
842         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
843
844         if (found_key.objectid != bytenr ||
845             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
846                 goto out;
847         }
848
849         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
850         extent_refs = btrfs_extent_refs(l, item);
851         while (1) {
852                 l = path->nodes[0];
853                 nritems = btrfs_header_nritems(l);
854                 if (path->slots[0] >= nritems) {
855                         ret = btrfs_next_leaf(extent_root, path);
856                         if (ret == 0)
857                                 continue;
858                         break;
859                 }
860                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
861                 if (found_key.objectid != bytenr)
862                         break;
863
864                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
865                         path->slots[0]++;
866                         continue;
867                 }
868
869                 cur_count++;
870                 ref_item = btrfs_item_ptr(l, path->slots[0],
871                                           struct btrfs_extent_ref);
872                 found_objectid = btrfs_ref_root(l, ref_item);
873
874                 if (found_objectid != root_objectid) {
875                         total_count = 2;
876                         goto out;
877                 }
878                 if (level == -1) {
879                         found_owner = btrfs_ref_objectid(l, ref_item);
880                         if (found_owner != expected_owner) {
881                                 total_count = 2;
882                                 goto out;
883                         }
884                         /*
885                          * nasty.  we don't count a reference held by
886                          * the running transaction.  This allows nodatacow
887                          * to avoid cow most of the time
888                          */
889                         if (found_owner >= BTRFS_FIRST_FREE_OBJECTID &&
890                             btrfs_ref_generation(l, ref_item) ==
891                             root->fs_info->generation) {
892                                 extent_refs--;
893                         }
894                 }
895                 total_count = 1;
896                 path->slots[0]++;
897         }
898         /*
899          * if there is more than one reference against a data extent,
900          * we have to assume the other ref is another snapshot
901          */
902         if (level == -1 && extent_refs > 1) {
903                 total_count = 2;
904                 goto out;
905         }
906         if (cur_count == 0) {
907                 total_count = 0;
908                 goto out;
909         }
910         if (level >= 0 && root->node == count_path->nodes[level])
911                 goto out;
912         level++;
913         btrfs_release_path(root, path);
914         goto again;
915
916 out:
917         btrfs_free_path(path);
918         mutex_unlock(&root->fs_info->alloc_mutex);
919         return total_count;
920 }
921
922 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
923                   struct extent_buffer *buf)
924 {
925         u64 bytenr;
926         u32 nritems;
927         struct btrfs_key key;
928         struct btrfs_file_extent_item *fi;
929         int i;
930         int level;
931         int ret;
932         int faili;
933
934         if (!root->ref_cows)
935                 return 0;
936
937         level = btrfs_header_level(buf);
938         nritems = btrfs_header_nritems(buf);
939         for (i = 0; i < nritems; i++) {
940                 cond_resched();
941                 if (level == 0) {
942                         u64 disk_bytenr;
943                         btrfs_item_key_to_cpu(buf, &key, i);
944                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
945                                 continue;
946                         fi = btrfs_item_ptr(buf, i,
947                                             struct btrfs_file_extent_item);
948                         if (btrfs_file_extent_type(buf, fi) ==
949                             BTRFS_FILE_EXTENT_INLINE)
950                                 continue;
951                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
952                         if (disk_bytenr == 0)
953                                 continue;
954
955                         mutex_lock(&root->fs_info->alloc_mutex);
956                         ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
957                                     btrfs_file_extent_disk_num_bytes(buf, fi),
958                                     root->root_key.objectid, trans->transid,
959                                     key.objectid, key.offset);
960                         mutex_unlock(&root->fs_info->alloc_mutex);
961                         if (ret) {
962                                 faili = i;
963                                 WARN_ON(1);
964                                 goto fail;
965                         }
966                 } else {
967                         bytenr = btrfs_node_blockptr(buf, i);
968                         btrfs_node_key_to_cpu(buf, &key, i);
969
970                         mutex_lock(&root->fs_info->alloc_mutex);
971                         ret = __btrfs_inc_extent_ref(trans, root, bytenr,
972                                            btrfs_level_size(root, level - 1),
973                                            root->root_key.objectid,
974                                            trans->transid,
975                                            level - 1, key.objectid);
976                         mutex_unlock(&root->fs_info->alloc_mutex);
977                         if (ret) {
978                                 faili = i;
979                                 WARN_ON(1);
980                                 goto fail;
981                         }
982                 }
983         }
984         return 0;
985 fail:
986         WARN_ON(1);
987 #if 0
988         for (i =0; i < faili; i++) {
989                 if (level == 0) {
990                         u64 disk_bytenr;
991                         btrfs_item_key_to_cpu(buf, &key, i);
992                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
993                                 continue;
994                         fi = btrfs_item_ptr(buf, i,
995                                             struct btrfs_file_extent_item);
996                         if (btrfs_file_extent_type(buf, fi) ==
997                             BTRFS_FILE_EXTENT_INLINE)
998                                 continue;
999                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1000                         if (disk_bytenr == 0)
1001                                 continue;
1002                         err = btrfs_free_extent(trans, root, disk_bytenr,
1003                                     btrfs_file_extent_disk_num_bytes(buf,
1004                                                                       fi), 0);
1005                         BUG_ON(err);
1006                 } else {
1007                         bytenr = btrfs_node_blockptr(buf, i);
1008                         err = btrfs_free_extent(trans, root, bytenr,
1009                                         btrfs_level_size(root, level - 1), 0);
1010                         BUG_ON(err);
1011                 }
1012         }
1013 #endif
1014         return ret;
1015 }
1016
1017 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1018                                  struct btrfs_root *root,
1019                                  struct btrfs_path *path,
1020                                  struct btrfs_block_group_cache *cache)
1021 {
1022         int ret;
1023         int pending_ret;
1024         struct btrfs_root *extent_root = root->fs_info->extent_root;
1025         unsigned long bi;
1026         struct extent_buffer *leaf;
1027
1028         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1029         if (ret < 0)
1030                 goto fail;
1031         BUG_ON(ret);
1032
1033         leaf = path->nodes[0];
1034         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1035         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1036         btrfs_mark_buffer_dirty(leaf);
1037         btrfs_release_path(extent_root, path);
1038 fail:
1039         finish_current_insert(trans, extent_root);
1040         pending_ret = del_pending_extents(trans, extent_root);
1041         if (ret)
1042                 return ret;
1043         if (pending_ret)
1044                 return pending_ret;
1045         return 0;
1046
1047 }
1048
1049 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1050                                    struct btrfs_root *root)
1051 {
1052         struct extent_io_tree *block_group_cache;
1053         struct btrfs_block_group_cache *cache;
1054         int ret;
1055         int err = 0;
1056         int werr = 0;
1057         struct btrfs_path *path;
1058         u64 last = 0;
1059         u64 start;
1060         u64 end;
1061         u64 ptr;
1062
1063         block_group_cache = &root->fs_info->block_group_cache;
1064         path = btrfs_alloc_path();
1065         if (!path)
1066                 return -ENOMEM;
1067
1068         mutex_lock(&root->fs_info->alloc_mutex);
1069         while(1) {
1070                 ret = find_first_extent_bit(block_group_cache, last,
1071                                             &start, &end, BLOCK_GROUP_DIRTY);
1072                 if (ret)
1073                         break;
1074
1075                 last = end + 1;
1076                 ret = get_state_private(block_group_cache, start, &ptr);
1077                 if (ret)
1078                         break;
1079                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
1080                 err = write_one_cache_group(trans, root,
1081                                             path, cache);
1082                 /*
1083                  * if we fail to write the cache group, we want
1084                  * to keep it marked dirty in hopes that a later
1085                  * write will work
1086                  */
1087                 if (err) {
1088                         werr = err;
1089                         continue;
1090                 }
1091                 clear_extent_bits(block_group_cache, start, end,
1092                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
1093         }
1094         btrfs_free_path(path);
1095         mutex_unlock(&root->fs_info->alloc_mutex);
1096         return werr;
1097 }
1098
1099 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1100                                                   u64 flags)
1101 {
1102         struct list_head *head = &info->space_info;
1103         struct list_head *cur;
1104         struct btrfs_space_info *found;
1105         list_for_each(cur, head) {
1106                 found = list_entry(cur, struct btrfs_space_info, list);
1107                 if (found->flags == flags)
1108                         return found;
1109         }
1110         return NULL;
1111
1112 }
1113
1114 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1115                              u64 total_bytes, u64 bytes_used,
1116                              struct btrfs_space_info **space_info)
1117 {
1118         struct btrfs_space_info *found;
1119
1120         found = __find_space_info(info, flags);
1121         if (found) {
1122                 found->total_bytes += total_bytes;
1123                 found->bytes_used += bytes_used;
1124                 found->full = 0;
1125                 WARN_ON(found->total_bytes < found->bytes_used);
1126                 *space_info = found;
1127                 return 0;
1128         }
1129         found = kmalloc(sizeof(*found), GFP_NOFS);
1130         if (!found)
1131                 return -ENOMEM;
1132
1133         list_add(&found->list, &info->space_info);
1134         found->flags = flags;
1135         found->total_bytes = total_bytes;
1136         found->bytes_used = bytes_used;
1137         found->bytes_pinned = 0;
1138         found->full = 0;
1139         found->force_alloc = 0;
1140         *space_info = found;
1141         return 0;
1142 }
1143
1144 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1145 {
1146         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1147                                    BTRFS_BLOCK_GROUP_RAID1 |
1148                                    BTRFS_BLOCK_GROUP_RAID10 |
1149                                    BTRFS_BLOCK_GROUP_DUP);
1150         if (extra_flags) {
1151                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1152                         fs_info->avail_data_alloc_bits |= extra_flags;
1153                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1154                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1155                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1156                         fs_info->avail_system_alloc_bits |= extra_flags;
1157         }
1158 }
1159
1160 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1161 {
1162         u64 num_devices = root->fs_info->fs_devices->num_devices;
1163
1164         if (num_devices == 1)
1165                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1166         if (num_devices < 4)
1167                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1168
1169         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1170             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1171                       BTRFS_BLOCK_GROUP_RAID10))) {
1172                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1173         }
1174
1175         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1176             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1177                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1178         }
1179
1180         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1181             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1182              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1183              (flags & BTRFS_BLOCK_GROUP_DUP)))
1184                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1185         return flags;
1186 }
1187
1188 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1189                           struct btrfs_root *extent_root, u64 alloc_bytes,
1190                           u64 flags, int force)
1191 {
1192         struct btrfs_space_info *space_info;
1193         u64 thresh;
1194         u64 start;
1195         u64 num_bytes;
1196         int ret;
1197
1198         flags = reduce_alloc_profile(extent_root, flags);
1199
1200         space_info = __find_space_info(extent_root->fs_info, flags);
1201         if (!space_info) {
1202                 ret = update_space_info(extent_root->fs_info, flags,
1203                                         0, 0, &space_info);
1204                 BUG_ON(ret);
1205         }
1206         BUG_ON(!space_info);
1207
1208         if (space_info->force_alloc) {
1209                 force = 1;
1210                 space_info->force_alloc = 0;
1211         }
1212         if (space_info->full)
1213                 goto out;
1214
1215         thresh = div_factor(space_info->total_bytes, 6);
1216         if (!force &&
1217            (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1218             thresh)
1219                 goto out;
1220
1221         mutex_lock(&extent_root->fs_info->chunk_mutex);
1222         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1223         if (ret == -ENOSPC) {
1224 printk("space info full %Lu\n", flags);
1225                 space_info->full = 1;
1226                 goto out_unlock;
1227         }
1228         BUG_ON(ret);
1229
1230         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1231                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1232         BUG_ON(ret);
1233 out_unlock:
1234         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1235 out:
1236         return 0;
1237 }
1238
1239 static int update_block_group(struct btrfs_trans_handle *trans,
1240                               struct btrfs_root *root,
1241                               u64 bytenr, u64 num_bytes, int alloc,
1242                               int mark_free)
1243 {
1244         struct btrfs_block_group_cache *cache;
1245         struct btrfs_fs_info *info = root->fs_info;
1246         u64 total = num_bytes;
1247         u64 old_val;
1248         u64 byte_in_group;
1249         u64 start;
1250         u64 end;
1251
1252         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1253         while(total) {
1254                 cache = btrfs_lookup_block_group(info, bytenr);
1255                 if (!cache) {
1256                         return -1;
1257                 }
1258                 byte_in_group = bytenr - cache->key.objectid;
1259                 WARN_ON(byte_in_group > cache->key.offset);
1260                 start = cache->key.objectid;
1261                 end = start + cache->key.offset - 1;
1262                 set_extent_bits(&info->block_group_cache, start, end,
1263                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1264
1265                 old_val = btrfs_block_group_used(&cache->item);
1266                 num_bytes = min(total, cache->key.offset - byte_in_group);
1267                 if (alloc) {
1268                         old_val += num_bytes;
1269                         cache->space_info->bytes_used += num_bytes;
1270                 } else {
1271                         old_val -= num_bytes;
1272                         cache->space_info->bytes_used -= num_bytes;
1273                         if (mark_free) {
1274                                 set_extent_dirty(&info->free_space_cache,
1275                                                  bytenr, bytenr + num_bytes - 1,
1276                                                  GFP_NOFS);
1277                         }
1278                 }
1279                 btrfs_set_block_group_used(&cache->item, old_val);
1280                 total -= num_bytes;
1281                 bytenr += num_bytes;
1282         }
1283         return 0;
1284 }
1285
1286 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1287 {
1288         u64 start;
1289         u64 end;
1290         int ret;
1291         ret = find_first_extent_bit(&root->fs_info->block_group_cache,
1292                                     search_start, &start, &end,
1293                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
1294                                     BLOCK_GROUP_SYSTEM);
1295         if (ret)
1296                 return 0;
1297         return start;
1298 }
1299
1300
1301 static int update_pinned_extents(struct btrfs_root *root,
1302                                 u64 bytenr, u64 num, int pin)
1303 {
1304         u64 len;
1305         struct btrfs_block_group_cache *cache;
1306         struct btrfs_fs_info *fs_info = root->fs_info;
1307
1308         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1309         if (pin) {
1310                 set_extent_dirty(&fs_info->pinned_extents,
1311                                 bytenr, bytenr + num - 1, GFP_NOFS);
1312         } else {
1313                 clear_extent_dirty(&fs_info->pinned_extents,
1314                                 bytenr, bytenr + num - 1, GFP_NOFS);
1315         }
1316         while (num > 0) {
1317                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1318                 if (!cache) {
1319                         u64 first = first_logical_byte(root, bytenr);
1320                         WARN_ON(first < bytenr);
1321                         len = min(first - bytenr, num);
1322                 } else {
1323                         len = min(num, cache->key.offset -
1324                                   (bytenr - cache->key.objectid));
1325                 }
1326                 if (pin) {
1327                         if (cache) {
1328                                 cache->pinned += len;
1329                                 cache->space_info->bytes_pinned += len;
1330                         }
1331                         fs_info->total_pinned += len;
1332                 } else {
1333                         if (cache) {
1334                                 cache->pinned -= len;
1335                                 cache->space_info->bytes_pinned -= len;
1336                         }
1337                         fs_info->total_pinned -= len;
1338                 }
1339                 bytenr += len;
1340                 num -= len;
1341         }
1342         return 0;
1343 }
1344
1345 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1346 {
1347         u64 last = 0;
1348         u64 start;
1349         u64 end;
1350         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1351         int ret;
1352
1353         while(1) {
1354                 ret = find_first_extent_bit(pinned_extents, last,
1355                                             &start, &end, EXTENT_DIRTY);
1356                 if (ret)
1357                         break;
1358                 set_extent_dirty(copy, start, end, GFP_NOFS);
1359                 last = end + 1;
1360         }
1361         return 0;
1362 }
1363
1364 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1365                                struct btrfs_root *root,
1366                                struct extent_io_tree *unpin)
1367 {
1368         u64 start;
1369         u64 end;
1370         int ret;
1371         struct extent_io_tree *free_space_cache;
1372         free_space_cache = &root->fs_info->free_space_cache;
1373
1374         mutex_lock(&root->fs_info->alloc_mutex);
1375         while(1) {
1376                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1377                                             EXTENT_DIRTY);
1378                 if (ret)
1379                         break;
1380                 update_pinned_extents(root, start, end + 1 - start, 0);
1381                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1382                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1383         }
1384         mutex_unlock(&root->fs_info->alloc_mutex);
1385         return 0;
1386 }
1387
1388 static int finish_current_insert(struct btrfs_trans_handle *trans,
1389                                  struct btrfs_root *extent_root)
1390 {
1391         u64 start;
1392         u64 end;
1393         struct btrfs_fs_info *info = extent_root->fs_info;
1394         struct extent_buffer *eb;
1395         struct btrfs_path *path;
1396         struct btrfs_key ins;
1397         struct btrfs_disk_key first;
1398         struct btrfs_extent_item extent_item;
1399         int ret;
1400         int level;
1401         int err = 0;
1402
1403         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1404         btrfs_set_stack_extent_refs(&extent_item, 1);
1405         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1406         path = btrfs_alloc_path();
1407
1408         while(1) {
1409                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1410                                             &end, EXTENT_LOCKED);
1411                 if (ret)
1412                         break;
1413
1414                 ins.objectid = start;
1415                 ins.offset = end + 1 - start;
1416                 err = btrfs_insert_item(trans, extent_root, &ins,
1417                                         &extent_item, sizeof(extent_item));
1418                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1419                                   GFP_NOFS);
1420                 eb = read_tree_block(extent_root, ins.objectid, ins.offset,
1421                                      trans->transid);
1422                 btrfs_tree_lock(eb);
1423                 level = btrfs_header_level(eb);
1424                 if (level == 0) {
1425                         btrfs_item_key(eb, &first, 0);
1426                 } else {
1427                         btrfs_node_key(eb, &first, 0);
1428                 }
1429                 btrfs_tree_unlock(eb);
1430                 free_extent_buffer(eb);
1431                 /*
1432                  * the first key is just a hint, so the race we've created
1433                  * against reading it is fine
1434                  */
1435                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1436                                           start, extent_root->root_key.objectid,
1437                                           0, level,
1438                                           btrfs_disk_key_objectid(&first));
1439                 BUG_ON(err);
1440         }
1441         btrfs_free_path(path);
1442         return 0;
1443 }
1444
1445 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1446                           int pending)
1447 {
1448         int err = 0;
1449
1450         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1451         if (!pending) {
1452                 struct extent_buffer *buf;
1453                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1454                 if (buf) {
1455                         if (btrfs_try_tree_lock(buf) &&
1456                             btrfs_buffer_uptodate(buf, 0)) {
1457                                 u64 transid =
1458                                     root->fs_info->running_transaction->transid;
1459                                 u64 header_transid =
1460                                         btrfs_header_generation(buf);
1461                                 if (header_transid == transid &&
1462                                     !btrfs_header_flag(buf,
1463                                                BTRFS_HEADER_FLAG_WRITTEN)) {
1464                                         clean_tree_block(NULL, root, buf);
1465                                         btrfs_tree_unlock(buf);
1466                                         free_extent_buffer(buf);
1467                                         return 1;
1468                                 }
1469                                 btrfs_tree_unlock(buf);
1470                         }
1471                         free_extent_buffer(buf);
1472                 }
1473                 update_pinned_extents(root, bytenr, num_bytes, 1);
1474         } else {
1475                 set_extent_bits(&root->fs_info->pending_del,
1476                                 bytenr, bytenr + num_bytes - 1,
1477                                 EXTENT_LOCKED, GFP_NOFS);
1478         }
1479         BUG_ON(err < 0);
1480         return 0;
1481 }
1482
1483 /*
1484  * remove an extent from the root, returns 0 on success
1485  */
1486 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1487                          *root, u64 bytenr, u64 num_bytes,
1488                          u64 root_objectid, u64 ref_generation,
1489                          u64 owner_objectid, u64 owner_offset, int pin,
1490                          int mark_free)
1491 {
1492         struct btrfs_path *path;
1493         struct btrfs_key key;
1494         struct btrfs_fs_info *info = root->fs_info;
1495         struct btrfs_root *extent_root = info->extent_root;
1496         struct extent_buffer *leaf;
1497         int ret;
1498         int extent_slot = 0;
1499         int found_extent = 0;
1500         int num_to_del = 1;
1501         struct btrfs_extent_item *ei;
1502         u32 refs;
1503
1504         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1505         key.objectid = bytenr;
1506         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1507         key.offset = num_bytes;
1508         path = btrfs_alloc_path();
1509         if (!path)
1510                 return -ENOMEM;
1511
1512         path->reada = 1;
1513         ret = lookup_extent_backref(trans, extent_root, path,
1514                                     bytenr, root_objectid,
1515                                     ref_generation,
1516                                     owner_objectid, owner_offset, 1);
1517         if (ret == 0) {
1518                 struct btrfs_key found_key;
1519                 extent_slot = path->slots[0];
1520                 while(extent_slot > 0) {
1521                         extent_slot--;
1522                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1523                                               extent_slot);
1524                         if (found_key.objectid != bytenr)
1525                                 break;
1526                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1527                             found_key.offset == num_bytes) {
1528                                 found_extent = 1;
1529                                 break;
1530                         }
1531                         if (path->slots[0] - extent_slot > 5)
1532                                 break;
1533                 }
1534                 if (!found_extent)
1535                         ret = btrfs_del_item(trans, extent_root, path);
1536         } else {
1537                 btrfs_print_leaf(extent_root, path->nodes[0]);
1538                 WARN_ON(1);
1539                 printk("Unable to find ref byte nr %Lu root %Lu "
1540                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1541                        root_objectid, ref_generation, owner_objectid,
1542                        owner_offset);
1543         }
1544         if (!found_extent) {
1545                 btrfs_release_path(extent_root, path);
1546                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1547                 if (ret < 0)
1548                         return ret;
1549                 BUG_ON(ret);
1550                 extent_slot = path->slots[0];
1551         }
1552
1553         leaf = path->nodes[0];
1554         ei = btrfs_item_ptr(leaf, extent_slot,
1555                             struct btrfs_extent_item);
1556         refs = btrfs_extent_refs(leaf, ei);
1557         BUG_ON(refs == 0);
1558         refs -= 1;
1559         btrfs_set_extent_refs(leaf, ei, refs);
1560
1561         btrfs_mark_buffer_dirty(leaf);
1562
1563         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1564                 /* if the back ref and the extent are next to each other
1565                  * they get deleted below in one shot
1566                  */
1567                 path->slots[0] = extent_slot;
1568                 num_to_del = 2;
1569         } else if (found_extent) {
1570                 /* otherwise delete the extent back ref */
1571                 ret = btrfs_del_item(trans, extent_root, path);
1572                 BUG_ON(ret);
1573                 /* if refs are 0, we need to setup the path for deletion */
1574                 if (refs == 0) {
1575                         btrfs_release_path(extent_root, path);
1576                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1577                                                 -1, 1);
1578                         if (ret < 0)
1579                                 return ret;
1580                         BUG_ON(ret);
1581                 }
1582         }
1583
1584         if (refs == 0) {
1585                 u64 super_used;
1586                 u64 root_used;
1587
1588                 if (pin) {
1589                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1590                         if (ret > 0)
1591                                 mark_free = 1;
1592                         BUG_ON(ret < 0);
1593                 }
1594
1595                 /* block accounting for super block */
1596                 spin_lock_irq(&info->delalloc_lock);
1597                 super_used = btrfs_super_bytes_used(&info->super_copy);
1598                 btrfs_set_super_bytes_used(&info->super_copy,
1599                                            super_used - num_bytes);
1600                 spin_unlock_irq(&info->delalloc_lock);
1601
1602                 /* block accounting for root item */
1603                 root_used = btrfs_root_used(&root->root_item);
1604                 btrfs_set_root_used(&root->root_item,
1605                                            root_used - num_bytes);
1606                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1607                                       num_to_del);
1608                 if (ret) {
1609                         return ret;
1610                 }
1611                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1612                                          mark_free);
1613                 BUG_ON(ret);
1614         }
1615         btrfs_free_path(path);
1616         finish_current_insert(trans, extent_root);
1617         return ret;
1618 }
1619
1620 /*
1621  * find all the blocks marked as pending in the radix tree and remove
1622  * them from the extent map
1623  */
1624 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1625                                btrfs_root *extent_root)
1626 {
1627         int ret;
1628         int err = 0;
1629         u64 start;
1630         u64 end;
1631         struct extent_io_tree *pending_del;
1632         struct extent_io_tree *pinned_extents;
1633
1634         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1635         pending_del = &extent_root->fs_info->pending_del;
1636         pinned_extents = &extent_root->fs_info->pinned_extents;
1637
1638         while(1) {
1639                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1640                                             EXTENT_LOCKED);
1641                 if (ret)
1642                         break;
1643                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1644                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1645                                   GFP_NOFS);
1646                 ret = __free_extent(trans, extent_root,
1647                                      start, end + 1 - start,
1648                                      extent_root->root_key.objectid,
1649                                      0, 0, 0, 0, 0);
1650                 if (ret)
1651                         err = ret;
1652         }
1653         return err;
1654 }
1655
1656 /*
1657  * remove an extent from the root, returns 0 on success
1658  */
1659 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
1660                                struct btrfs_root *root, u64 bytenr,
1661                                u64 num_bytes, u64 root_objectid,
1662                                u64 ref_generation, u64 owner_objectid,
1663                                u64 owner_offset, int pin)
1664 {
1665         struct btrfs_root *extent_root = root->fs_info->extent_root;
1666         int pending_ret;
1667         int ret;
1668
1669         WARN_ON(num_bytes < root->sectorsize);
1670         if (!root->ref_cows)
1671                 ref_generation = 0;
1672
1673         if (root == extent_root) {
1674                 pin_down_bytes(root, bytenr, num_bytes, 1);
1675                 return 0;
1676         }
1677         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1678                             ref_generation, owner_objectid, owner_offset,
1679                             pin, pin == 0);
1680
1681         finish_current_insert(trans, root->fs_info->extent_root);
1682         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1683         return ret ? ret : pending_ret;
1684 }
1685
1686 int btrfs_free_extent(struct btrfs_trans_handle *trans,
1687                       struct btrfs_root *root, u64 bytenr,
1688                       u64 num_bytes, u64 root_objectid,
1689                       u64 ref_generation, u64 owner_objectid,
1690                       u64 owner_offset, int pin)
1691 {
1692         int ret;
1693
1694         maybe_lock_mutex(root);
1695         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes,
1696                                   root_objectid, ref_generation,
1697                                   owner_objectid, owner_offset, pin);
1698         maybe_unlock_mutex(root);
1699         return ret;
1700 }
1701
1702 static u64 stripe_align(struct btrfs_root *root, u64 val)
1703 {
1704         u64 mask = ((u64)root->stripesize - 1);
1705         u64 ret = (val + mask) & ~mask;
1706         return ret;
1707 }
1708
1709 /*
1710  * walks the btree of allocated extents and find a hole of a given size.
1711  * The key ins is changed to record the hole:
1712  * ins->objectid == block start
1713  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1714  * ins->offset == number of blocks
1715  * Any available blocks before search_start are skipped.
1716  */
1717 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1718                                      struct btrfs_root *orig_root,
1719                                      u64 num_bytes, u64 empty_size,
1720                                      u64 search_start, u64 search_end,
1721                                      u64 hint_byte, struct btrfs_key *ins,
1722                                      u64 exclude_start, u64 exclude_nr,
1723                                      int data)
1724 {
1725         int ret;
1726         u64 orig_search_start;
1727         struct btrfs_root * root = orig_root->fs_info->extent_root;
1728         struct btrfs_fs_info *info = root->fs_info;
1729         u64 total_needed = num_bytes;
1730         u64 *last_ptr = NULL;
1731         struct btrfs_block_group_cache *block_group;
1732         int full_scan = 0;
1733         int wrapped = 0;
1734         int chunk_alloc_done = 0;
1735         int empty_cluster = 2 * 1024 * 1024;
1736         int allowed_chunk_alloc = 0;
1737
1738         WARN_ON(num_bytes < root->sectorsize);
1739         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1740
1741         if (orig_root->ref_cows || empty_size)
1742                 allowed_chunk_alloc = 1;
1743
1744         if (data & BTRFS_BLOCK_GROUP_METADATA) {
1745                 last_ptr = &root->fs_info->last_alloc;
1746                 empty_cluster = 256 * 1024;
1747         }
1748
1749         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
1750                 last_ptr = &root->fs_info->last_data_alloc;
1751         }
1752
1753         if (last_ptr) {
1754                 if (*last_ptr)
1755                         hint_byte = *last_ptr;
1756                 else {
1757                         empty_size += empty_cluster;
1758                 }
1759         }
1760
1761         search_start = max(search_start, first_logical_byte(root, 0));
1762         orig_search_start = search_start;
1763
1764         if (search_end == (u64)-1)
1765                 search_end = btrfs_super_total_bytes(&info->super_copy);
1766
1767         if (hint_byte) {
1768                 block_group = btrfs_lookup_first_block_group(info, hint_byte);
1769                 if (!block_group)
1770                         hint_byte = search_start;
1771                 block_group = __btrfs_find_block_group(root, block_group,
1772                                                      hint_byte, data, 1);
1773                 if (last_ptr && *last_ptr == 0 && block_group)
1774                         hint_byte = block_group->key.objectid;
1775         } else {
1776                 block_group = __btrfs_find_block_group(root,
1777                                                      trans->block_group,
1778                                                      search_start, data, 1);
1779         }
1780         search_start = max(search_start, hint_byte);
1781
1782         total_needed += empty_size;
1783
1784 check_failed:
1785         if (!block_group) {
1786                 block_group = btrfs_lookup_first_block_group(info,
1787                                                              search_start);
1788                 if (!block_group)
1789                         block_group = btrfs_lookup_first_block_group(info,
1790                                                        orig_search_start);
1791         }
1792         if (full_scan && !chunk_alloc_done) {
1793                 if (allowed_chunk_alloc) {
1794                         do_chunk_alloc(trans, root,
1795                                      num_bytes + 2 * 1024 * 1024, data, 1);
1796                         allowed_chunk_alloc = 0;
1797                 } else if (block_group && block_group_bits(block_group, data)) {
1798                         block_group->space_info->force_alloc = 1;
1799                 }
1800                 chunk_alloc_done = 1;
1801         }
1802         ret = find_search_start(root, &block_group, &search_start,
1803                                 total_needed, data);
1804         if (ret == -ENOSPC && last_ptr && *last_ptr) {
1805                 *last_ptr = 0;
1806                 block_group = btrfs_lookup_first_block_group(info,
1807                                                              orig_search_start);
1808                 search_start = orig_search_start;
1809                 ret = find_search_start(root, &block_group, &search_start,
1810                                         total_needed, data);
1811         }
1812         if (ret == -ENOSPC)
1813                 goto enospc;
1814         if (ret)
1815                 goto error;
1816
1817         if (last_ptr && *last_ptr && search_start != *last_ptr) {
1818                 *last_ptr = 0;
1819                 if (!empty_size) {
1820                         empty_size += empty_cluster;
1821                         total_needed += empty_size;
1822                 }
1823                 block_group = btrfs_lookup_first_block_group(info,
1824                                                        orig_search_start);
1825                 search_start = orig_search_start;
1826                 ret = find_search_start(root, &block_group,
1827                                         &search_start, total_needed, data);
1828                 if (ret == -ENOSPC)
1829                         goto enospc;
1830                 if (ret)
1831                         goto error;
1832         }
1833
1834         search_start = stripe_align(root, search_start);
1835         ins->objectid = search_start;
1836         ins->offset = num_bytes;
1837
1838         if (ins->objectid + num_bytes >= search_end)
1839                 goto enospc;
1840
1841         if (ins->objectid + num_bytes >
1842             block_group->key.objectid + block_group->key.offset) {
1843                 search_start = block_group->key.objectid +
1844                         block_group->key.offset;
1845                 goto new_group;
1846         }
1847
1848         if (test_range_bit(&info->extent_ins, ins->objectid,
1849                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1850                 search_start = ins->objectid + num_bytes;
1851                 goto new_group;
1852         }
1853
1854         if (test_range_bit(&info->pinned_extents, ins->objectid,
1855                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1856                 search_start = ins->objectid + num_bytes;
1857                 goto new_group;
1858         }
1859
1860         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1861             ins->objectid < exclude_start + exclude_nr)) {
1862                 search_start = exclude_start + exclude_nr;
1863                 goto new_group;
1864         }
1865
1866         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1867                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1868                 if (block_group)
1869                         trans->block_group = block_group;
1870         }
1871         ins->offset = num_bytes;
1872         if (last_ptr) {
1873                 *last_ptr = ins->objectid + ins->offset;
1874                 if (*last_ptr ==
1875                     btrfs_super_total_bytes(&root->fs_info->super_copy)) {
1876                         *last_ptr = 0;
1877                 }
1878         }
1879         return 0;
1880
1881 new_group:
1882         if (search_start + num_bytes >= search_end) {
1883 enospc:
1884                 search_start = orig_search_start;
1885                 if (full_scan) {
1886                         ret = -ENOSPC;
1887                         goto error;
1888                 }
1889                 if (wrapped) {
1890                         if (!full_scan)
1891                                 total_needed -= empty_size;
1892                         full_scan = 1;
1893                 } else
1894                         wrapped = 1;
1895         }
1896         block_group = btrfs_lookup_first_block_group(info, search_start);
1897         cond_resched();
1898         block_group = __btrfs_find_block_group(root, block_group,
1899                                              search_start, data, 0);
1900         goto check_failed;
1901
1902 error:
1903         return ret;
1904 }
1905
1906 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1907                                   struct btrfs_root *root,
1908                                   u64 num_bytes, u64 min_alloc_size,
1909                                   u64 empty_size, u64 hint_byte,
1910                                   u64 search_end, struct btrfs_key *ins,
1911                                   u64 data)
1912 {
1913         int ret;
1914         u64 search_start = 0;
1915         u64 alloc_profile;
1916         struct btrfs_fs_info *info = root->fs_info;
1917
1918         if (data) {
1919                 alloc_profile = info->avail_data_alloc_bits &
1920                                 info->data_alloc_profile;
1921                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1922         } else if (root == root->fs_info->chunk_root) {
1923                 alloc_profile = info->avail_system_alloc_bits &
1924                                 info->system_alloc_profile;
1925                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1926         } else {
1927                 alloc_profile = info->avail_metadata_alloc_bits &
1928                                 info->metadata_alloc_profile;
1929                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1930         }
1931 again:
1932         data = reduce_alloc_profile(root, data);
1933         /*
1934          * the only place that sets empty_size is btrfs_realloc_node, which
1935          * is not called recursively on allocations
1936          */
1937         if (empty_size || root->ref_cows) {
1938                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1939                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1940                                      2 * 1024 * 1024,
1941                                      BTRFS_BLOCK_GROUP_METADATA |
1942                                      (info->metadata_alloc_profile &
1943                                       info->avail_metadata_alloc_bits), 0);
1944                         BUG_ON(ret);
1945                 }
1946                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1947                                      num_bytes + 2 * 1024 * 1024, data, 0);
1948                 BUG_ON(ret);
1949         }
1950
1951         WARN_ON(num_bytes < root->sectorsize);
1952         ret = find_free_extent(trans, root, num_bytes, empty_size,
1953                                search_start, search_end, hint_byte, ins,
1954                                trans->alloc_exclude_start,
1955                                trans->alloc_exclude_nr, data);
1956
1957         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
1958                 num_bytes = num_bytes >> 1;
1959                 num_bytes = max(num_bytes, min_alloc_size);
1960                 do_chunk_alloc(trans, root->fs_info->extent_root,
1961                                num_bytes, data, 1);
1962                 goto again;
1963         }
1964         if (ret) {
1965                 printk("allocation failed flags %Lu\n", data);
1966                 BUG();
1967         }
1968         clear_extent_dirty(&root->fs_info->free_space_cache,
1969                            ins->objectid, ins->objectid + ins->offset - 1,
1970                            GFP_NOFS);
1971         return 0;
1972 }
1973
1974 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1975                                   struct btrfs_root *root,
1976                                   u64 num_bytes, u64 min_alloc_size,
1977                                   u64 empty_size, u64 hint_byte,
1978                                   u64 search_end, struct btrfs_key *ins,
1979                                   u64 data)
1980 {
1981         int ret;
1982         maybe_lock_mutex(root);
1983         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
1984                                      empty_size, hint_byte, search_end, ins,
1985                                      data);
1986         maybe_unlock_mutex(root);
1987         return ret;
1988 }
1989
1990 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
1991                                          struct btrfs_root *root,
1992                                          u64 root_objectid, u64 ref_generation,
1993                                          u64 owner, u64 owner_offset,
1994                                          struct btrfs_key *ins)
1995 {
1996         int ret;
1997         int pending_ret;
1998         u64 super_used;
1999         u64 root_used;
2000         u64 num_bytes = ins->offset;
2001         u32 sizes[2];
2002         struct btrfs_fs_info *info = root->fs_info;
2003         struct btrfs_root *extent_root = info->extent_root;
2004         struct btrfs_extent_item *extent_item;
2005         struct btrfs_extent_ref *ref;
2006         struct btrfs_path *path;
2007         struct btrfs_key keys[2];
2008
2009         /* block accounting for super block */
2010         spin_lock_irq(&info->delalloc_lock);
2011         super_used = btrfs_super_bytes_used(&info->super_copy);
2012         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2013         spin_unlock_irq(&info->delalloc_lock);
2014
2015         /* block accounting for root item */
2016         root_used = btrfs_root_used(&root->root_item);
2017         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
2018
2019         if (root == extent_root) {
2020                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2021                                 ins->objectid + ins->offset - 1,
2022                                 EXTENT_LOCKED, GFP_NOFS);
2023                 goto update_block;
2024         }
2025
2026         memcpy(&keys[0], ins, sizeof(*ins));
2027         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
2028                                          owner, owner_offset);
2029         keys[1].objectid = ins->objectid;
2030         keys[1].type = BTRFS_EXTENT_REF_KEY;
2031         sizes[0] = sizeof(*extent_item);
2032         sizes[1] = sizeof(*ref);
2033
2034         path = btrfs_alloc_path();
2035         BUG_ON(!path);
2036
2037         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2038                                        sizes, 2);
2039
2040         BUG_ON(ret);
2041         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2042                                      struct btrfs_extent_item);
2043         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2044         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2045                              struct btrfs_extent_ref);
2046
2047         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2048         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2049         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2050         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
2051
2052         btrfs_mark_buffer_dirty(path->nodes[0]);
2053
2054         trans->alloc_exclude_start = 0;
2055         trans->alloc_exclude_nr = 0;
2056         btrfs_free_path(path);
2057         finish_current_insert(trans, extent_root);
2058         pending_ret = del_pending_extents(trans, extent_root);
2059
2060         if (ret)
2061                 goto out;
2062         if (pending_ret) {
2063                 ret = pending_ret;
2064                 goto out;
2065         }
2066
2067 update_block:
2068         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
2069         if (ret) {
2070                 printk("update block group failed for %Lu %Lu\n",
2071                        ins->objectid, ins->offset);
2072                 BUG();
2073         }
2074 out:
2075         return ret;
2076 }
2077
2078 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2079                                 struct btrfs_root *root,
2080                                 u64 root_objectid, u64 ref_generation,
2081                                 u64 owner, u64 owner_offset,
2082                                 struct btrfs_key *ins)
2083 {
2084         int ret;
2085         maybe_lock_mutex(root);
2086         ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2087                                             ref_generation, owner,
2088                                             owner_offset, ins);
2089         maybe_unlock_mutex(root);
2090         return ret;
2091 }
2092 /*
2093  * finds a free extent and does all the dirty work required for allocation
2094  * returns the key for the extent through ins, and a tree buffer for
2095  * the first block of the extent through buf.
2096  *
2097  * returns 0 if everything worked, non-zero otherwise.
2098  */
2099 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2100                        struct btrfs_root *root,
2101                        u64 num_bytes, u64 min_alloc_size,
2102                        u64 root_objectid, u64 ref_generation,
2103                        u64 owner, u64 owner_offset,
2104                        u64 empty_size, u64 hint_byte,
2105                        u64 search_end, struct btrfs_key *ins, u64 data)
2106 {
2107         int ret;
2108
2109         maybe_lock_mutex(root);
2110
2111         ret = __btrfs_reserve_extent(trans, root, num_bytes,
2112                                      min_alloc_size, empty_size, hint_byte,
2113                                      search_end, ins, data);
2114         BUG_ON(ret);
2115         ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2116                                             ref_generation, owner,
2117                                             owner_offset, ins);
2118         BUG_ON(ret);
2119
2120         maybe_unlock_mutex(root);
2121         return ret;
2122 }
2123 /*
2124  * helper function to allocate a block for a given tree
2125  * returns the tree buffer or NULL.
2126  */
2127 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2128                                              struct btrfs_root *root,
2129                                              u32 blocksize,
2130                                              u64 root_objectid,
2131                                              u64 ref_generation,
2132                                              u64 first_objectid,
2133                                              int level,
2134                                              u64 hint,
2135                                              u64 empty_size)
2136 {
2137         struct btrfs_key ins;
2138         int ret;
2139         struct extent_buffer *buf;
2140
2141         ret = btrfs_alloc_extent(trans, root, blocksize, blocksize,
2142                                  root_objectid, ref_generation,
2143                                  level, first_objectid, empty_size, hint,
2144                                  (u64)-1, &ins, 0);
2145         if (ret) {
2146                 BUG_ON(ret > 0);
2147                 return ERR_PTR(ret);
2148         }
2149         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
2150         if (!buf) {
2151                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
2152                                   root->root_key.objectid, ref_generation,
2153                                   0, 0, 0);
2154                 return ERR_PTR(-ENOMEM);
2155         }
2156         btrfs_set_header_generation(buf, trans->transid);
2157         btrfs_tree_lock(buf);
2158         clean_tree_block(trans, root, buf);
2159         btrfs_set_buffer_uptodate(buf);
2160
2161         if (PageDirty(buf->first_page)) {
2162                 printk("page %lu dirty\n", buf->first_page->index);
2163                 WARN_ON(1);
2164         }
2165
2166         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2167                          buf->start + buf->len - 1, GFP_NOFS);
2168         trans->blocks_used++;
2169         return buf;
2170 }
2171
2172 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2173                                   struct btrfs_root *root,
2174                                   struct extent_buffer *leaf)
2175 {
2176         u64 leaf_owner;
2177         u64 leaf_generation;
2178         struct btrfs_key key;
2179         struct btrfs_file_extent_item *fi;
2180         int i;
2181         int nritems;
2182         int ret;
2183
2184         BUG_ON(!btrfs_is_leaf(leaf));
2185         nritems = btrfs_header_nritems(leaf);
2186         leaf_owner = btrfs_header_owner(leaf);
2187         leaf_generation = btrfs_header_generation(leaf);
2188
2189         mutex_unlock(&root->fs_info->alloc_mutex);
2190
2191         for (i = 0; i < nritems; i++) {
2192                 u64 disk_bytenr;
2193                 cond_resched();
2194
2195                 btrfs_item_key_to_cpu(leaf, &key, i);
2196                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2197                         continue;
2198                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2199                 if (btrfs_file_extent_type(leaf, fi) ==
2200                     BTRFS_FILE_EXTENT_INLINE)
2201                         continue;
2202                 /*
2203                  * FIXME make sure to insert a trans record that
2204                  * repeats the snapshot del on crash
2205                  */
2206                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2207                 if (disk_bytenr == 0)
2208                         continue;
2209
2210                 mutex_lock(&root->fs_info->alloc_mutex);
2211                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2212                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2213                                 leaf_owner, leaf_generation,
2214                                 key.objectid, key.offset, 0);
2215                 mutex_unlock(&root->fs_info->alloc_mutex);
2216                 BUG_ON(ret);
2217         }
2218
2219         mutex_lock(&root->fs_info->alloc_mutex);
2220         return 0;
2221 }
2222
2223 static void noinline reada_walk_down(struct btrfs_root *root,
2224                                      struct extent_buffer *node,
2225                                      int slot)
2226 {
2227         u64 bytenr;
2228         u64 last = 0;
2229         u32 nritems;
2230         u32 refs;
2231         u32 blocksize;
2232         int ret;
2233         int i;
2234         int level;
2235         int skipped = 0;
2236
2237         nritems = btrfs_header_nritems(node);
2238         level = btrfs_header_level(node);
2239         if (level)
2240                 return;
2241
2242         for (i = slot; i < nritems && skipped < 32; i++) {
2243                 bytenr = btrfs_node_blockptr(node, i);
2244                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
2245                              (last > bytenr && last - bytenr > 32 * 1024))) {
2246                         skipped++;
2247                         continue;
2248                 }
2249                 blocksize = btrfs_level_size(root, level - 1);
2250                 if (i != slot) {
2251                         ret = lookup_extent_ref(NULL, root, bytenr,
2252                                                 blocksize, &refs);
2253                         BUG_ON(ret);
2254                         if (refs != 1) {
2255                                 skipped++;
2256                                 continue;
2257                         }
2258                 }
2259                 ret = readahead_tree_block(root, bytenr, blocksize,
2260                                            btrfs_node_ptr_generation(node, i));
2261                 last = bytenr + blocksize;
2262                 cond_resched();
2263                 if (ret)
2264                         break;
2265         }
2266 }
2267
2268 /*
2269  * we want to avoid as much random IO as we can with the alloc mutex
2270  * held, so drop the lock and do the lookup, then do it again with the
2271  * lock held.
2272  */
2273 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2274                               u32 *refs)
2275 {
2276         mutex_unlock(&root->fs_info->alloc_mutex);
2277         lookup_extent_ref(NULL, root, start, len, refs);
2278         cond_resched();
2279         mutex_lock(&root->fs_info->alloc_mutex);
2280         return lookup_extent_ref(NULL, root, start, len, refs);
2281 }
2282
2283 /*
2284  * helper function for drop_snapshot, this walks down the tree dropping ref
2285  * counts as it goes.
2286  */
2287 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2288                                    struct btrfs_root *root,
2289                                    struct btrfs_path *path, int *level)
2290 {
2291         u64 root_owner;
2292         u64 root_gen;
2293         u64 bytenr;
2294         u64 ptr_gen;
2295         struct extent_buffer *next;
2296         struct extent_buffer *cur;
2297         struct extent_buffer *parent;
2298         u32 blocksize;
2299         int ret;
2300         u32 refs;
2301
2302         mutex_lock(&root->fs_info->alloc_mutex);
2303
2304         WARN_ON(*level < 0);
2305         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2306         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
2307                                 path->nodes[*level]->len, &refs);
2308         BUG_ON(ret);
2309         if (refs > 1)
2310                 goto out;
2311
2312         /*
2313          * walk down to the last node level and free all the leaves
2314          */
2315         while(*level >= 0) {
2316                 WARN_ON(*level < 0);
2317                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2318                 cur = path->nodes[*level];
2319
2320                 if (btrfs_header_level(cur) != *level)
2321                         WARN_ON(1);
2322
2323                 if (path->slots[*level] >=
2324                     btrfs_header_nritems(cur))
2325                         break;
2326                 if (*level == 0) {
2327                         ret = drop_leaf_ref(trans, root, cur);
2328                         BUG_ON(ret);
2329                         break;
2330                 }
2331                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2332                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2333                 blocksize = btrfs_level_size(root, *level - 1);
2334
2335                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
2336                 BUG_ON(ret);
2337                 if (refs != 1) {
2338                         parent = path->nodes[*level];
2339                         root_owner = btrfs_header_owner(parent);
2340                         root_gen = btrfs_header_generation(parent);
2341                         path->slots[*level]++;
2342                         ret = __btrfs_free_extent(trans, root, bytenr,
2343                                                 blocksize, root_owner,
2344                                                 root_gen, 0, 0, 1);
2345                         BUG_ON(ret);
2346                         continue;
2347                 }
2348                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2349                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2350                         free_extent_buffer(next);
2351                         mutex_unlock(&root->fs_info->alloc_mutex);
2352
2353                         if (path->slots[*level] == 0)
2354                                 reada_walk_down(root, cur, path->slots[*level]);
2355
2356                         next = read_tree_block(root, bytenr, blocksize,
2357                                                ptr_gen);
2358                         cond_resched();
2359                         mutex_lock(&root->fs_info->alloc_mutex);
2360
2361                         /* we've dropped the lock, double check */
2362                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2363                                                 &refs);
2364                         BUG_ON(ret);
2365                         if (refs != 1) {
2366                                 parent = path->nodes[*level];
2367                                 root_owner = btrfs_header_owner(parent);
2368                                 root_gen = btrfs_header_generation(parent);
2369
2370                                 path->slots[*level]++;
2371                                 free_extent_buffer(next);
2372                                 ret = __btrfs_free_extent(trans, root, bytenr,
2373                                                         blocksize,
2374                                                         root_owner,
2375                                                         root_gen, 0, 0, 1);
2376                                 BUG_ON(ret);
2377                                 continue;
2378                         }
2379                 }
2380                 WARN_ON(*level <= 0);
2381                 if (path->nodes[*level-1])
2382                         free_extent_buffer(path->nodes[*level-1]);
2383                 path->nodes[*level-1] = next;
2384                 *level = btrfs_header_level(next);
2385                 path->slots[*level] = 0;
2386         }
2387 out:
2388         WARN_ON(*level < 0);
2389         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2390
2391         if (path->nodes[*level] == root->node) {
2392                 root_owner = root->root_key.objectid;
2393                 parent = path->nodes[*level];
2394         } else {
2395                 parent = path->nodes[*level + 1];
2396                 root_owner = btrfs_header_owner(parent);
2397         }
2398
2399         root_gen = btrfs_header_generation(parent);
2400         ret = __btrfs_free_extent(trans, root, path->nodes[*level]->start,
2401                                 path->nodes[*level]->len,
2402                                 root_owner, root_gen, 0, 0, 1);
2403         free_extent_buffer(path->nodes[*level]);
2404         path->nodes[*level] = NULL;
2405         *level += 1;
2406         BUG_ON(ret);
2407         mutex_unlock(&root->fs_info->alloc_mutex);
2408         cond_resched();
2409         return 0;
2410 }
2411
2412 /*
2413  * helper for dropping snapshots.  This walks back up the tree in the path
2414  * to find the first node higher up where we haven't yet gone through
2415  * all the slots
2416  */
2417 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2418                                  struct btrfs_root *root,
2419                                  struct btrfs_path *path, int *level)
2420 {
2421         u64 root_owner;
2422         u64 root_gen;
2423         struct btrfs_root_item *root_item = &root->root_item;
2424         int i;
2425         int slot;
2426         int ret;
2427
2428         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2429                 slot = path->slots[i];
2430                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2431                         struct extent_buffer *node;
2432                         struct btrfs_disk_key disk_key;
2433                         node = path->nodes[i];
2434                         path->slots[i]++;
2435                         *level = i;
2436                         WARN_ON(*level == 0);
2437                         btrfs_node_key(node, &disk_key, path->slots[i]);
2438                         memcpy(&root_item->drop_progress,
2439                                &disk_key, sizeof(disk_key));
2440                         root_item->drop_level = i;
2441                         return 0;
2442                 } else {
2443                         if (path->nodes[*level] == root->node) {
2444                                 root_owner = root->root_key.objectid;
2445                                 root_gen =
2446                                    btrfs_header_generation(path->nodes[*level]);
2447                         } else {
2448                                 struct extent_buffer *node;
2449                                 node = path->nodes[*level + 1];
2450                                 root_owner = btrfs_header_owner(node);
2451                                 root_gen = btrfs_header_generation(node);
2452                         }
2453                         ret = btrfs_free_extent(trans, root,
2454                                                 path->nodes[*level]->start,
2455                                                 path->nodes[*level]->len,
2456                                                 root_owner, root_gen, 0, 0, 1);
2457                         BUG_ON(ret);
2458                         free_extent_buffer(path->nodes[*level]);
2459                         path->nodes[*level] = NULL;
2460                         *level = i + 1;
2461                 }
2462         }
2463         return 1;
2464 }
2465
2466 /*
2467  * drop the reference count on the tree rooted at 'snap'.  This traverses
2468  * the tree freeing any blocks that have a ref count of zero after being
2469  * decremented.
2470  */
2471 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2472                         *root)
2473 {
2474         int ret = 0;
2475         int wret;
2476         int level;
2477         struct btrfs_path *path;
2478         int i;
2479         int orig_level;
2480         struct btrfs_root_item *root_item = &root->root_item;
2481
2482         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
2483         path = btrfs_alloc_path();
2484         BUG_ON(!path);
2485
2486         level = btrfs_header_level(root->node);
2487         orig_level = level;
2488         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2489                 path->nodes[level] = root->node;
2490                 extent_buffer_get(root->node);
2491                 path->slots[level] = 0;
2492         } else {
2493                 struct btrfs_key key;
2494                 struct btrfs_disk_key found_key;
2495                 struct extent_buffer *node;
2496
2497                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2498                 level = root_item->drop_level;
2499                 path->lowest_level = level;
2500                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2501                 if (wret < 0) {
2502                         ret = wret;
2503                         goto out;
2504                 }
2505                 node = path->nodes[level];
2506                 btrfs_node_key(node, &found_key, path->slots[level]);
2507                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2508                                sizeof(found_key)));
2509                 /*
2510                  * unlock our path, this is safe because only this
2511                  * function is allowed to delete this snapshot
2512                  */
2513                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
2514                         if (path->nodes[i] && path->locks[i]) {
2515                                 path->locks[i] = 0;
2516                                 btrfs_tree_unlock(path->nodes[i]);
2517                         }
2518                 }
2519         }
2520         while(1) {
2521                 wret = walk_down_tree(trans, root, path, &level);
2522                 if (wret > 0)
2523                         break;
2524                 if (wret < 0)
2525                         ret = wret;
2526
2527                 wret = walk_up_tree(trans, root, path, &level);
2528                 if (wret > 0)
2529                         break;
2530                 if (wret < 0)
2531                         ret = wret;
2532                 if (trans->transaction->in_commit) {
2533                         ret = -EAGAIN;
2534                         break;
2535                 }
2536         }
2537         for (i = 0; i <= orig_level; i++) {
2538                 if (path->nodes[i]) {
2539                         free_extent_buffer(path->nodes[i]);
2540                         path->nodes[i] = NULL;
2541                 }
2542         }
2543 out:
2544         btrfs_free_path(path);
2545         return ret;
2546 }
2547
2548 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2549 {
2550         u64 start;
2551         u64 end;
2552         u64 ptr;
2553         int ret;
2554
2555         mutex_lock(&info->alloc_mutex);
2556         while(1) {
2557                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2558                                             &start, &end, (unsigned int)-1);
2559                 if (ret)
2560                         break;
2561                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2562                 if (!ret)
2563                         kfree((void *)(unsigned long)ptr);
2564                 clear_extent_bits(&info->block_group_cache, start,
2565                                   end, (unsigned int)-1, GFP_NOFS);
2566         }
2567         while(1) {
2568                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2569                                             &start, &end, EXTENT_DIRTY);
2570                 if (ret)
2571                         break;
2572                 clear_extent_dirty(&info->free_space_cache, start,
2573                                    end, GFP_NOFS);
2574         }
2575         mutex_unlock(&info->alloc_mutex);
2576         return 0;
2577 }
2578
2579 static unsigned long calc_ra(unsigned long start, unsigned long last,
2580                              unsigned long nr)
2581 {
2582         return min(last, start + nr - 1);
2583 }
2584
2585 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2586                                          u64 len)
2587 {
2588         u64 page_start;
2589         u64 page_end;
2590         unsigned long last_index;
2591         unsigned long i;
2592         struct page *page;
2593         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2594         struct file_ra_state *ra;
2595         unsigned long total_read = 0;
2596         unsigned long ra_pages;
2597         struct btrfs_trans_handle *trans;
2598
2599         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2600
2601         mutex_lock(&inode->i_mutex);
2602         i = start >> PAGE_CACHE_SHIFT;
2603         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2604
2605         ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
2606
2607         file_ra_state_init(ra, inode->i_mapping);
2608
2609         for (; i <= last_index; i++) {
2610                 if (total_read % ra_pages == 0) {
2611                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
2612                                        calc_ra(i, last_index, ra_pages));
2613                 }
2614                 total_read++;
2615                 if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size)
2616                         goto truncate_racing;
2617
2618                 page = grab_cache_page(inode->i_mapping, i);
2619                 if (!page) {
2620                         goto out_unlock;
2621                 }
2622                 if (!PageUptodate(page)) {
2623                         btrfs_readpage(NULL, page);
2624                         lock_page(page);
2625                         if (!PageUptodate(page)) {
2626                                 unlock_page(page);
2627                                 page_cache_release(page);
2628                                 goto out_unlock;
2629                         }
2630                 }
2631 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2632                 ClearPageDirty(page);
2633 #else
2634                 cancel_dirty_page(page, PAGE_CACHE_SIZE);
2635 #endif
2636                 wait_on_page_writeback(page);
2637                 set_page_extent_mapped(page);
2638                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2639                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2640
2641                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2642
2643                 set_extent_delalloc(io_tree, page_start,
2644                                     page_end, GFP_NOFS);
2645                 set_page_dirty(page);
2646
2647                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2648                 unlock_page(page);
2649                 page_cache_release(page);
2650         }
2651         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2652                                            total_read);
2653
2654 out_unlock:
2655         kfree(ra);
2656         trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
2657         if (trans) {
2658                 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
2659                 mark_inode_dirty(inode);
2660         }
2661         mutex_unlock(&inode->i_mutex);
2662         return 0;
2663
2664 truncate_racing:
2665         vmtruncate(inode, inode->i_size);
2666         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2667                                            total_read);
2668         goto out_unlock;
2669 }
2670
2671 /*
2672  * The back references tell us which tree holds a ref on a block,
2673  * but it is possible for the tree root field in the reference to
2674  * reflect the original root before a snapshot was made.  In this
2675  * case we should search through all the children of a given root
2676  * to find potential holders of references on a block.
2677  *
2678  * Instead, we do something a little less fancy and just search
2679  * all the roots for a given key/block combination.
2680  */
2681 static int find_root_for_ref(struct btrfs_root *root,
2682                              struct btrfs_path *path,
2683                              struct btrfs_key *key0,
2684                              int level,
2685                              int file_key,
2686                              struct btrfs_root **found_root,
2687                              u64 bytenr)
2688 {
2689         struct btrfs_key root_location;
2690         struct btrfs_root *cur_root = *found_root;
2691         struct btrfs_file_extent_item *file_extent;
2692         u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
2693         u64 found_bytenr;
2694         int ret;
2695
2696         root_location.offset = (u64)-1;
2697         root_location.type = BTRFS_ROOT_ITEM_KEY;
2698         path->lowest_level = level;
2699         path->reada = 0;
2700         while(1) {
2701                 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0);
2702                 found_bytenr = 0;
2703                 if (ret == 0 && file_key) {
2704                         struct extent_buffer *leaf = path->nodes[0];
2705                         file_extent = btrfs_item_ptr(leaf, path->slots[0],
2706                                              struct btrfs_file_extent_item);
2707                         if (btrfs_file_extent_type(leaf, file_extent) ==
2708                             BTRFS_FILE_EXTENT_REG) {
2709                                 found_bytenr =
2710                                         btrfs_file_extent_disk_bytenr(leaf,
2711                                                                file_extent);
2712                        }
2713                 } else if (!file_key) {
2714                         if (path->nodes[level])
2715                                 found_bytenr = path->nodes[level]->start;
2716                 }
2717
2718                 btrfs_release_path(cur_root, path);
2719
2720                 if (found_bytenr == bytenr) {
2721                         *found_root = cur_root;
2722                         ret = 0;
2723                         goto out;
2724                 }
2725                 ret = btrfs_search_root(root->fs_info->tree_root,
2726                                         root_search_start, &root_search_start);
2727                 if (ret)
2728                         break;
2729
2730                 root_location.objectid = root_search_start;
2731                 cur_root = btrfs_read_fs_root_no_name(root->fs_info,
2732                                                       &root_location);
2733                 if (!cur_root) {
2734                         ret = 1;
2735                         break;
2736                 }
2737         }
2738 out:
2739         path->lowest_level = 0;
2740         return ret;
2741 }
2742
2743 /*
2744  * note, this releases the path
2745  */
2746 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2747                                   struct btrfs_path *path,
2748                                   struct btrfs_key *extent_key,
2749                                   u64 *last_file_objectid,
2750                                   u64 *last_file_offset,
2751                                   u64 *last_file_root,
2752                                   u64 last_extent)
2753 {
2754         struct inode *inode;
2755         struct btrfs_root *found_root;
2756         struct btrfs_key root_location;
2757         struct btrfs_key found_key;
2758         struct btrfs_extent_ref *ref;
2759         u64 ref_root;
2760         u64 ref_gen;
2761         u64 ref_objectid;
2762         u64 ref_offset;
2763         int ret;
2764         int level;
2765
2766         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
2767
2768         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2769                              struct btrfs_extent_ref);
2770         ref_root = btrfs_ref_root(path->nodes[0], ref);
2771         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2772         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2773         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2774         btrfs_release_path(extent_root, path);
2775
2776         root_location.objectid = ref_root;
2777         if (ref_gen == 0)
2778                 root_location.offset = 0;
2779         else
2780                 root_location.offset = (u64)-1;
2781         root_location.type = BTRFS_ROOT_ITEM_KEY;
2782
2783         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2784                                                 &root_location);
2785         BUG_ON(!found_root);
2786         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2787
2788         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2789                 found_key.objectid = ref_objectid;
2790                 found_key.type = BTRFS_EXTENT_DATA_KEY;
2791                 found_key.offset = ref_offset;
2792                 level = 0;
2793
2794                 if (last_extent == extent_key->objectid &&
2795                     *last_file_objectid == ref_objectid &&
2796                     *last_file_offset == ref_offset &&
2797                     *last_file_root == ref_root)
2798                         goto out;
2799
2800                 ret = find_root_for_ref(extent_root, path, &found_key,
2801                                         level, 1, &found_root,
2802                                         extent_key->objectid);
2803
2804                 if (ret)
2805                         goto out;
2806
2807                 if (last_extent == extent_key->objectid &&
2808                     *last_file_objectid == ref_objectid &&
2809                     *last_file_offset == ref_offset &&
2810                     *last_file_root == ref_root)
2811                         goto out;
2812
2813                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2814                                           ref_objectid, found_root);
2815                 if (inode->i_state & I_NEW) {
2816                         /* the inode and parent dir are two different roots */
2817                         BTRFS_I(inode)->root = found_root;
2818                         BTRFS_I(inode)->location.objectid = ref_objectid;
2819                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2820                         BTRFS_I(inode)->location.offset = 0;
2821                         btrfs_read_locked_inode(inode);
2822                         unlock_new_inode(inode);
2823
2824                 }
2825                 /* this can happen if the reference is not against
2826                  * the latest version of the tree root
2827                  */
2828                 if (is_bad_inode(inode))
2829                         goto out;
2830
2831                 *last_file_objectid = inode->i_ino;
2832                 *last_file_root = found_root->root_key.objectid;
2833                 *last_file_offset = ref_offset;
2834
2835                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2836                 iput(inode);
2837         } else {
2838                 struct btrfs_trans_handle *trans;
2839                 struct extent_buffer *eb;
2840                 int needs_lock = 0;
2841
2842                 eb = read_tree_block(found_root, extent_key->objectid,
2843                                      extent_key->offset, 0);
2844                 btrfs_tree_lock(eb);
2845                 level = btrfs_header_level(eb);
2846
2847                 if (level == 0)
2848                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2849                 else
2850                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2851
2852                 btrfs_tree_unlock(eb);
2853                 free_extent_buffer(eb);
2854
2855                 ret = find_root_for_ref(extent_root, path, &found_key,
2856                                         level, 0, &found_root,
2857                                         extent_key->objectid);
2858
2859                 if (ret)
2860                         goto out;
2861
2862                 /*
2863                  * right here almost anything could happen to our key,
2864                  * but that's ok.  The cow below will either relocate it
2865                  * or someone else will have relocated it.  Either way,
2866                  * it is in a different spot than it was before and
2867                  * we're happy.
2868                  */
2869
2870                 trans = btrfs_start_transaction(found_root, 1);
2871
2872                 if (found_root == extent_root->fs_info->extent_root ||
2873                     found_root == extent_root->fs_info->chunk_root ||
2874                     found_root == extent_root->fs_info->dev_root) {
2875                         needs_lock = 1;
2876                         mutex_lock(&extent_root->fs_info->alloc_mutex);
2877                 }
2878
2879                 path->lowest_level = level;
2880                 path->reada = 2;
2881                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2882                                         0, 1);
2883                 path->lowest_level = 0;
2884                 btrfs_release_path(found_root, path);
2885
2886                 if (found_root == found_root->fs_info->extent_root)
2887                         btrfs_extent_post_op(trans, found_root);
2888                 if (needs_lock)
2889                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2890
2891                 btrfs_end_transaction(trans, found_root);
2892
2893         }
2894 out:
2895         mutex_lock(&extent_root->fs_info->alloc_mutex);
2896         return 0;
2897 }
2898
2899 static int noinline del_extent_zero(struct btrfs_root *extent_root,
2900                                     struct btrfs_path *path,
2901                                     struct btrfs_key *extent_key)
2902 {
2903         int ret;
2904         struct btrfs_trans_handle *trans;
2905
2906         trans = btrfs_start_transaction(extent_root, 1);
2907         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
2908         if (ret > 0) {
2909                 ret = -EIO;
2910                 goto out;
2911         }
2912         if (ret < 0)
2913                 goto out;
2914         ret = btrfs_del_item(trans, extent_root, path);
2915 out:
2916         btrfs_end_transaction(trans, extent_root);
2917         return ret;
2918 }
2919
2920 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2921                                         struct btrfs_path *path,
2922                                         struct btrfs_key *extent_key)
2923 {
2924         struct btrfs_key key;
2925         struct btrfs_key found_key;
2926         struct extent_buffer *leaf;
2927         u64 last_file_objectid = 0;
2928         u64 last_file_root = 0;
2929         u64 last_file_offset = (u64)-1;
2930         u64 last_extent = 0;
2931         u32 nritems;
2932         u32 item_size;
2933         int ret = 0;
2934
2935         if (extent_key->objectid == 0) {
2936                 ret = del_extent_zero(extent_root, path, extent_key);
2937                 goto out;
2938         }
2939         key.objectid = extent_key->objectid;
2940         key.type = BTRFS_EXTENT_REF_KEY;
2941         key.offset = 0;
2942
2943         while(1) {
2944                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2945
2946                 if (ret < 0)
2947                         goto out;
2948
2949                 ret = 0;
2950                 leaf = path->nodes[0];
2951                 nritems = btrfs_header_nritems(leaf);
2952                 if (path->slots[0] == nritems) {
2953                         ret = btrfs_next_leaf(extent_root, path);
2954                         if (ret > 0) {
2955                                 ret = 0;
2956                                 goto out;
2957                         }
2958                         if (ret < 0)
2959                                 goto out;
2960                         leaf = path->nodes[0];
2961                 }
2962
2963                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2964                 if (found_key.objectid != extent_key->objectid) {
2965                         break;
2966                 }
2967
2968                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
2969                         break;
2970                 }
2971
2972                 key.offset = found_key.offset + 1;
2973                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2974
2975                 ret = relocate_one_reference(extent_root, path, extent_key,
2976                                              &last_file_objectid,
2977                                              &last_file_offset,
2978                                              &last_file_root, last_extent);
2979                 if (ret)
2980                         goto out;
2981                 last_extent = extent_key->objectid;
2982         }
2983         ret = 0;
2984 out:
2985         btrfs_release_path(extent_root, path);
2986         return ret;
2987 }
2988
2989 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
2990 {
2991         u64 num_devices;
2992         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
2993                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
2994
2995         num_devices = root->fs_info->fs_devices->num_devices;
2996         if (num_devices == 1) {
2997                 stripped |= BTRFS_BLOCK_GROUP_DUP;
2998                 stripped = flags & ~stripped;
2999
3000                 /* turn raid0 into single device chunks */
3001                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
3002                         return stripped;
3003
3004                 /* turn mirroring into duplication */
3005                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3006                              BTRFS_BLOCK_GROUP_RAID10))
3007                         return stripped | BTRFS_BLOCK_GROUP_DUP;
3008                 return flags;
3009         } else {
3010                 /* they already had raid on here, just return */
3011                 if (flags & stripped)
3012                         return flags;
3013
3014                 stripped |= BTRFS_BLOCK_GROUP_DUP;
3015                 stripped = flags & ~stripped;
3016
3017                 /* switch duplicated blocks with raid1 */
3018                 if (flags & BTRFS_BLOCK_GROUP_DUP)
3019                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
3020
3021                 /* turn single device chunks into raid0 */
3022                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
3023         }
3024         return flags;
3025 }
3026
3027 int __alloc_chunk_for_shrink(struct btrfs_root *root,
3028                      struct btrfs_block_group_cache *shrink_block_group,
3029                      int force)
3030 {
3031         struct btrfs_trans_handle *trans;
3032         u64 new_alloc_flags;
3033         u64 calc;
3034
3035         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
3036
3037                 mutex_unlock(&root->fs_info->alloc_mutex);
3038                 trans = btrfs_start_transaction(root, 1);
3039                 mutex_lock(&root->fs_info->alloc_mutex);
3040
3041                 new_alloc_flags = update_block_group_flags(root,
3042                                                    shrink_block_group->flags);
3043                 if (new_alloc_flags != shrink_block_group->flags) {
3044                         calc =
3045                              btrfs_block_group_used(&shrink_block_group->item);
3046                 } else {
3047                         calc = shrink_block_group->key.offset;
3048                 }
3049                 do_chunk_alloc(trans, root->fs_info->extent_root,
3050                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
3051
3052                 mutex_unlock(&root->fs_info->alloc_mutex);
3053                 btrfs_end_transaction(trans, root);
3054                 mutex_lock(&root->fs_info->alloc_mutex);
3055         }
3056         return 0;
3057 }
3058
3059 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
3060 {
3061         struct btrfs_trans_handle *trans;
3062         struct btrfs_root *tree_root = root->fs_info->tree_root;
3063         struct btrfs_path *path;
3064         u64 cur_byte;
3065         u64 total_found;
3066         u64 shrink_last_byte;
3067         struct btrfs_block_group_cache *shrink_block_group;
3068         struct btrfs_fs_info *info = root->fs_info;
3069         struct btrfs_key key;
3070         struct btrfs_key found_key;
3071         struct extent_buffer *leaf;
3072         u32 nritems;
3073         int ret;
3074         int progress;
3075
3076         mutex_lock(&root->fs_info->alloc_mutex);
3077         shrink_block_group = btrfs_lookup_block_group(root->fs_info,
3078                                                       shrink_start);
3079         BUG_ON(!shrink_block_group);
3080
3081         shrink_last_byte = shrink_block_group->key.objectid +
3082                 shrink_block_group->key.offset;
3083
3084         shrink_block_group->space_info->total_bytes -=
3085                 shrink_block_group->key.offset;
3086         path = btrfs_alloc_path();
3087         root = root->fs_info->extent_root;
3088         path->reada = 2;
3089
3090         printk("btrfs relocating block group %llu flags %llu\n",
3091                (unsigned long long)shrink_start,
3092                (unsigned long long)shrink_block_group->flags);
3093
3094         __alloc_chunk_for_shrink(root, shrink_block_group, 1);
3095
3096 again:
3097
3098         shrink_block_group->ro = 1;
3099
3100         total_found = 0;
3101         progress = 0;
3102         key.objectid = shrink_start;
3103         key.offset = 0;
3104         key.type = 0;
3105         cur_byte = key.objectid;
3106
3107         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3108         if (ret < 0)
3109                 goto out;
3110
3111         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
3112         if (ret < 0)
3113                 goto out;
3114
3115         if (ret == 0) {
3116                 leaf = path->nodes[0];
3117                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3118                 if (found_key.objectid + found_key.offset > shrink_start &&
3119                     found_key.objectid < shrink_last_byte) {
3120                         cur_byte = found_key.objectid;
3121                         key.objectid = cur_byte;
3122                 }
3123         }
3124         btrfs_release_path(root, path);
3125
3126         while(1) {
3127                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3128                 if (ret < 0)
3129                         goto out;
3130
3131 next:
3132                 leaf = path->nodes[0];
3133                 nritems = btrfs_header_nritems(leaf);
3134                 if (path->slots[0] >= nritems) {
3135                         ret = btrfs_next_leaf(root, path);
3136                         if (ret < 0)
3137                                 goto out;
3138                         if (ret == 1) {
3139                                 ret = 0;
3140                                 break;
3141                         }
3142                         leaf = path->nodes[0];
3143                         nritems = btrfs_header_nritems(leaf);
3144                 }
3145
3146                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3147
3148                 if (found_key.objectid >= shrink_last_byte)
3149                         break;
3150
3151                 if (progress && need_resched()) {
3152                         memcpy(&key, &found_key, sizeof(key));
3153                         cond_resched();
3154                         btrfs_release_path(root, path);
3155                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
3156                         progress = 0;
3157                         goto next;
3158                 }
3159                 progress = 1;
3160
3161                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
3162                     found_key.objectid + found_key.offset <= cur_byte) {
3163                         memcpy(&key, &found_key, sizeof(key));
3164                         key.offset++;
3165                         path->slots[0]++;
3166                         goto next;
3167                 }
3168
3169                 total_found++;
3170                 cur_byte = found_key.objectid + found_key.offset;
3171                 key.objectid = cur_byte;
3172                 btrfs_release_path(root, path);
3173                 ret = relocate_one_extent(root, path, &found_key);
3174                 __alloc_chunk_for_shrink(root, shrink_block_group, 0);
3175         }
3176
3177         btrfs_release_path(root, path);
3178
3179         if (total_found > 0) {
3180                 printk("btrfs relocate found %llu last extent was %llu\n",
3181                        (unsigned long long)total_found,
3182                        (unsigned long long)found_key.objectid);
3183                 mutex_unlock(&root->fs_info->alloc_mutex);
3184                 trans = btrfs_start_transaction(tree_root, 1);
3185                 btrfs_commit_transaction(trans, tree_root);
3186
3187                 btrfs_clean_old_snapshots(tree_root);
3188
3189                 trans = btrfs_start_transaction(tree_root, 1);
3190                 btrfs_commit_transaction(trans, tree_root);
3191                 mutex_lock(&root->fs_info->alloc_mutex);
3192                 goto again;
3193         }
3194
3195         /*
3196          * we've freed all the extents, now remove the block
3197          * group item from the tree
3198          */
3199         mutex_unlock(&root->fs_info->alloc_mutex);
3200
3201         trans = btrfs_start_transaction(root, 1);
3202         mutex_lock(&root->fs_info->alloc_mutex);
3203         memcpy(&key, &shrink_block_group->key, sizeof(key));
3204
3205         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3206         if (ret > 0)
3207                 ret = -EIO;
3208         if (ret < 0)
3209                 goto out;
3210
3211         clear_extent_bits(&info->block_group_cache, key.objectid,
3212                           key.objectid + key.offset - 1,
3213                           (unsigned int)-1, GFP_NOFS);
3214
3215
3216         clear_extent_bits(&info->free_space_cache,
3217                            key.objectid, key.objectid + key.offset - 1,
3218                            (unsigned int)-1, GFP_NOFS);
3219
3220         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3221         kfree(shrink_block_group);
3222
3223         btrfs_del_item(trans, root, path);
3224         btrfs_release_path(root, path);
3225         mutex_unlock(&root->fs_info->alloc_mutex);
3226         btrfs_commit_transaction(trans, root);
3227
3228         mutex_lock(&root->fs_info->alloc_mutex);
3229
3230         /* the code to unpin extents might set a few bits in the free
3231          * space cache for this range again
3232          */
3233         clear_extent_bits(&info->free_space_cache,
3234                            key.objectid, key.objectid + key.offset - 1,
3235                            (unsigned int)-1, GFP_NOFS);
3236 out:
3237         btrfs_free_path(path);
3238         mutex_unlock(&root->fs_info->alloc_mutex);
3239         return ret;
3240 }
3241
3242 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3243                            struct btrfs_key *key)
3244 {
3245         int ret = 0;
3246         struct btrfs_key found_key;
3247         struct extent_buffer *leaf;
3248         int slot;
3249
3250         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3251         if (ret < 0)
3252                 goto out;
3253
3254         while(1) {
3255                 slot = path->slots[0];
3256                 leaf = path->nodes[0];
3257                 if (slot >= btrfs_header_nritems(leaf)) {
3258                         ret = btrfs_next_leaf(root, path);
3259                         if (ret == 0)
3260                                 continue;
3261                         if (ret < 0)
3262                                 goto out;
3263                         break;
3264                 }
3265                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3266
3267                 if (found_key.objectid >= key->objectid &&
3268                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3269                         ret = 0;
3270                         goto out;
3271                 }
3272                 path->slots[0]++;
3273         }
3274         ret = -ENOENT;
3275 out:
3276         return ret;
3277 }
3278
3279 int btrfs_read_block_groups(struct btrfs_root *root)
3280 {
3281         struct btrfs_path *path;
3282         int ret;
3283         int bit;
3284         struct btrfs_block_group_cache *cache;
3285         struct btrfs_fs_info *info = root->fs_info;
3286         struct btrfs_space_info *space_info;
3287         struct extent_io_tree *block_group_cache;
3288         struct btrfs_key key;
3289         struct btrfs_key found_key;
3290         struct extent_buffer *leaf;
3291
3292         block_group_cache = &info->block_group_cache;
3293         root = info->extent_root;
3294         key.objectid = 0;
3295         key.offset = 0;
3296         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3297         path = btrfs_alloc_path();
3298         if (!path)
3299                 return -ENOMEM;
3300
3301         mutex_lock(&root->fs_info->alloc_mutex);
3302         while(1) {
3303                 ret = find_first_block_group(root, path, &key);
3304                 if (ret > 0) {
3305                         ret = 0;
3306                         goto error;
3307                 }
3308                 if (ret != 0)
3309                         goto error;
3310
3311                 leaf = path->nodes[0];
3312                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3313                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3314                 if (!cache) {
3315                         ret = -ENOMEM;
3316                         break;
3317                 }
3318
3319                 read_extent_buffer(leaf, &cache->item,
3320                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
3321                                    sizeof(cache->item));
3322                 memcpy(&cache->key, &found_key, sizeof(found_key));
3323
3324                 key.objectid = found_key.objectid + found_key.offset;
3325                 btrfs_release_path(root, path);
3326                 cache->flags = btrfs_block_group_flags(&cache->item);
3327                 bit = 0;
3328                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3329                         bit = BLOCK_GROUP_DATA;
3330                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3331                         bit = BLOCK_GROUP_SYSTEM;
3332                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3333                         bit = BLOCK_GROUP_METADATA;
3334                 }
3335                 set_avail_alloc_bits(info, cache->flags);
3336
3337                 ret = update_space_info(info, cache->flags, found_key.offset,
3338                                         btrfs_block_group_used(&cache->item),
3339                                         &space_info);
3340                 BUG_ON(ret);
3341                 cache->space_info = space_info;
3342
3343                 /* use EXTENT_LOCKED to prevent merging */
3344                 set_extent_bits(block_group_cache, found_key.objectid,
3345                                 found_key.objectid + found_key.offset - 1,
3346                                 bit | EXTENT_LOCKED, GFP_NOFS);
3347                 set_state_private(block_group_cache, found_key.objectid,
3348                                   (unsigned long)cache);
3349
3350                 if (key.objectid >=
3351                     btrfs_super_total_bytes(&info->super_copy))
3352                         break;
3353         }
3354         ret = 0;
3355 error:
3356         btrfs_free_path(path);
3357         mutex_unlock(&root->fs_info->alloc_mutex);
3358         return ret;
3359 }
3360
3361 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3362                            struct btrfs_root *root, u64 bytes_used,
3363                            u64 type, u64 chunk_objectid, u64 chunk_offset,
3364                            u64 size)
3365 {
3366         int ret;
3367         int bit = 0;
3368         struct btrfs_root *extent_root;
3369         struct btrfs_block_group_cache *cache;
3370         struct extent_io_tree *block_group_cache;
3371
3372         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
3373         extent_root = root->fs_info->extent_root;
3374         block_group_cache = &root->fs_info->block_group_cache;
3375
3376         cache = kzalloc(sizeof(*cache), GFP_NOFS);
3377         BUG_ON(!cache);
3378         cache->key.objectid = chunk_offset;
3379         cache->key.offset = size;
3380         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3381
3382         btrfs_set_block_group_used(&cache->item, bytes_used);
3383         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
3384         cache->flags = type;
3385         btrfs_set_block_group_flags(&cache->item, type);
3386
3387         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
3388                                 &cache->space_info);
3389         BUG_ON(ret);
3390
3391         bit = block_group_state_bits(type);
3392         set_extent_bits(block_group_cache, chunk_offset,
3393                         chunk_offset + size - 1,
3394                         bit | EXTENT_LOCKED, GFP_NOFS);
3395
3396         set_state_private(block_group_cache, chunk_offset,
3397                           (unsigned long)cache);
3398         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3399                                 sizeof(cache->item));
3400         BUG_ON(ret);
3401
3402         finish_current_insert(trans, extent_root);
3403         ret = del_pending_extents(trans, extent_root);
3404         BUG_ON(ret);
3405         set_avail_alloc_bits(extent_root->fs_info, type);
3406
3407         return 0;
3408 }