Btrfs: Fixes for 2.6.28-rc API changes
[safe/jmp/linux-2.6] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include "hash.h"
23 #include "crc32c.h"
24 #include "ctree.h"
25 #include "disk-io.h"
26 #include "print-tree.h"
27 #include "transaction.h"
28 #include "volumes.h"
29 #include "locking.h"
30 #include "ref-cache.h"
31 #include "compat.h"
32
33 #define PENDING_EXTENT_INSERT 0
34 #define PENDING_EXTENT_DELETE 1
35 #define PENDING_BACKREF_UPDATE 2
36
37 struct pending_extent_op {
38         int type;
39         u64 bytenr;
40         u64 num_bytes;
41         u64 parent;
42         u64 orig_parent;
43         u64 generation;
44         u64 orig_generation;
45         int level;
46         struct list_head list;
47         int del;
48 };
49
50 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
51                                  btrfs_root *extent_root, int all);
52 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
53                                btrfs_root *extent_root, int all);
54 static struct btrfs_block_group_cache *
55 __btrfs_find_block_group(struct btrfs_root *root,
56                          struct btrfs_block_group_cache *hint,
57                          u64 search_start, int data, int owner);
58 static int pin_down_bytes(struct btrfs_trans_handle *trans,
59                           struct btrfs_root *root,
60                           u64 bytenr, u64 num_bytes, int is_data);
61 static int update_block_group(struct btrfs_trans_handle *trans,
62                               struct btrfs_root *root,
63                               u64 bytenr, u64 num_bytes, int alloc,
64                               int mark_free);
65
66 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
67 {
68         return (cache->flags & bits) == bits;
69 }
70
71 /*
72  * this adds the block group to the fs_info rb tree for the block group
73  * cache
74  */
75 int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
76                                 struct btrfs_block_group_cache *block_group)
77 {
78         struct rb_node **p;
79         struct rb_node *parent = NULL;
80         struct btrfs_block_group_cache *cache;
81
82         spin_lock(&info->block_group_cache_lock);
83         p = &info->block_group_cache_tree.rb_node;
84
85         while (*p) {
86                 parent = *p;
87                 cache = rb_entry(parent, struct btrfs_block_group_cache,
88                                  cache_node);
89                 if (block_group->key.objectid < cache->key.objectid) {
90                         p = &(*p)->rb_left;
91                 } else if (block_group->key.objectid > cache->key.objectid) {
92                         p = &(*p)->rb_right;
93                 } else {
94                         spin_unlock(&info->block_group_cache_lock);
95                         return -EEXIST;
96                 }
97         }
98
99         rb_link_node(&block_group->cache_node, parent, p);
100         rb_insert_color(&block_group->cache_node,
101                         &info->block_group_cache_tree);
102         spin_unlock(&info->block_group_cache_lock);
103
104         return 0;
105 }
106
107 /*
108  * This will return the block group at or after bytenr if contains is 0, else
109  * it will return the block group that contains the bytenr
110  */
111 static struct btrfs_block_group_cache *
112 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
113                               int contains)
114 {
115         struct btrfs_block_group_cache *cache, *ret = NULL;
116         struct rb_node *n;
117         u64 end, start;
118
119         spin_lock(&info->block_group_cache_lock);
120         n = info->block_group_cache_tree.rb_node;
121
122         while (n) {
123                 cache = rb_entry(n, struct btrfs_block_group_cache,
124                                  cache_node);
125                 end = cache->key.objectid + cache->key.offset - 1;
126                 start = cache->key.objectid;
127
128                 if (bytenr < start) {
129                         if (!contains && (!ret || start < ret->key.objectid))
130                                 ret = cache;
131                         n = n->rb_left;
132                 } else if (bytenr > start) {
133                         if (contains && bytenr <= end) {
134                                 ret = cache;
135                                 break;
136                         }
137                         n = n->rb_right;
138                 } else {
139                         ret = cache;
140                         break;
141                 }
142         }
143         spin_unlock(&info->block_group_cache_lock);
144
145         return ret;
146 }
147
148 /*
149  * this is only called by cache_block_group, since we could have freed extents
150  * we need to check the pinned_extents for any extents that can't be used yet
151  * since their free space will be released as soon as the transaction commits.
152  */
153 static int add_new_free_space(struct btrfs_block_group_cache *block_group,
154                               struct btrfs_fs_info *info, u64 start, u64 end)
155 {
156         u64 extent_start, extent_end, size;
157         int ret;
158
159         mutex_lock(&info->pinned_mutex);
160         while (start < end) {
161                 ret = find_first_extent_bit(&info->pinned_extents, start,
162                                             &extent_start, &extent_end,
163                                             EXTENT_DIRTY);
164                 if (ret)
165                         break;
166
167                 if (extent_start == start) {
168                         start = extent_end + 1;
169                 } else if (extent_start > start && extent_start < end) {
170                         size = extent_start - start;
171                         ret = btrfs_add_free_space_lock(block_group, start,
172                                                         size);
173                         BUG_ON(ret);
174                         start = extent_end + 1;
175                 } else {
176                         break;
177                 }
178         }
179
180         if (start < end) {
181                 size = end - start;
182                 ret = btrfs_add_free_space_lock(block_group, start, size);
183                 BUG_ON(ret);
184         }
185         mutex_unlock(&info->pinned_mutex);
186
187         return 0;
188 }
189
190 static int cache_block_group(struct btrfs_root *root,
191                              struct btrfs_block_group_cache *block_group)
192 {
193         struct btrfs_path *path;
194         int ret = 0;
195         struct btrfs_key key;
196         struct extent_buffer *leaf;
197         int slot;
198         u64 last = 0;
199         u64 first_free;
200         int found = 0;
201
202         if (!block_group)
203                 return 0;
204
205         root = root->fs_info->extent_root;
206
207         if (block_group->cached)
208                 return 0;
209
210         path = btrfs_alloc_path();
211         if (!path)
212                 return -ENOMEM;
213
214         path->reada = 2;
215         /*
216          * we get into deadlocks with paths held by callers of this function.
217          * since the alloc_mutex is protecting things right now, just
218          * skip the locking here
219          */
220         path->skip_locking = 1;
221         first_free = max_t(u64, block_group->key.objectid,
222                            BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
223         key.objectid = block_group->key.objectid;
224         key.offset = 0;
225         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
226         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
227         if (ret < 0)
228                 goto err;
229         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
230         if (ret < 0)
231                 goto err;
232         if (ret == 0) {
233                 leaf = path->nodes[0];
234                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
235                 if (key.objectid + key.offset > first_free)
236                         first_free = key.objectid + key.offset;
237         }
238         while(1) {
239                 leaf = path->nodes[0];
240                 slot = path->slots[0];
241                 if (slot >= btrfs_header_nritems(leaf)) {
242                         ret = btrfs_next_leaf(root, path);
243                         if (ret < 0)
244                                 goto err;
245                         if (ret == 0)
246                                 continue;
247                         else
248                                 break;
249                 }
250                 btrfs_item_key_to_cpu(leaf, &key, slot);
251                 if (key.objectid < block_group->key.objectid)
252                         goto next;
253
254                 if (key.objectid >= block_group->key.objectid +
255                     block_group->key.offset)
256                         break;
257
258                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
259                         if (!found) {
260                                 last = first_free;
261                                 found = 1;
262                         }
263
264                         add_new_free_space(block_group, root->fs_info, last,
265                                            key.objectid);
266
267                         last = key.objectid + key.offset;
268                 }
269 next:
270                 path->slots[0]++;
271         }
272
273         if (!found)
274                 last = first_free;
275
276         add_new_free_space(block_group, root->fs_info, last,
277                            block_group->key.objectid +
278                            block_group->key.offset);
279
280         block_group->cached = 1;
281         ret = 0;
282 err:
283         btrfs_free_path(path);
284         return ret;
285 }
286
287 /*
288  * return the block group that starts at or after bytenr
289  */
290 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
291                                                        btrfs_fs_info *info,
292                                                          u64 bytenr)
293 {
294         struct btrfs_block_group_cache *cache;
295
296         cache = block_group_cache_tree_search(info, bytenr, 0);
297
298         return cache;
299 }
300
301 /*
302  * return the block group that contains teh given bytenr
303  */
304 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
305                                                          btrfs_fs_info *info,
306                                                          u64 bytenr)
307 {
308         struct btrfs_block_group_cache *cache;
309
310         cache = block_group_cache_tree_search(info, bytenr, 1);
311
312         return cache;
313 }
314
315 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
316                                                   u64 flags)
317 {
318         struct list_head *head = &info->space_info;
319         struct list_head *cur;
320         struct btrfs_space_info *found;
321         list_for_each(cur, head) {
322                 found = list_entry(cur, struct btrfs_space_info, list);
323                 if (found->flags == flags)
324                         return found;
325         }
326         return NULL;
327 }
328
329 static u64 div_factor(u64 num, int factor)
330 {
331         if (factor == 10)
332                 return num;
333         num *= factor;
334         do_div(num, 10);
335         return num;
336 }
337
338 static struct btrfs_block_group_cache *
339 __btrfs_find_block_group(struct btrfs_root *root,
340                          struct btrfs_block_group_cache *hint,
341                          u64 search_start, int data, int owner)
342 {
343         struct btrfs_block_group_cache *cache;
344         struct btrfs_block_group_cache *found_group = NULL;
345         struct btrfs_fs_info *info = root->fs_info;
346         u64 used;
347         u64 last = 0;
348         u64 free_check;
349         int full_search = 0;
350         int factor = 10;
351         int wrapped = 0;
352
353         if (data & BTRFS_BLOCK_GROUP_METADATA)
354                 factor = 9;
355
356         if (search_start) {
357                 struct btrfs_block_group_cache *shint;
358                 shint = btrfs_lookup_first_block_group(info, search_start);
359                 if (shint && block_group_bits(shint, data)) {
360                         spin_lock(&shint->lock);
361                         used = btrfs_block_group_used(&shint->item);
362                         if (used + shint->pinned + shint->reserved <
363                             div_factor(shint->key.offset, factor)) {
364                                 spin_unlock(&shint->lock);
365                                 return shint;
366                         }
367                         spin_unlock(&shint->lock);
368                 }
369         }
370         if (hint && block_group_bits(hint, data)) {
371                 spin_lock(&hint->lock);
372                 used = btrfs_block_group_used(&hint->item);
373                 if (used + hint->pinned + hint->reserved <
374                     div_factor(hint->key.offset, factor)) {
375                         spin_unlock(&hint->lock);
376                         return hint;
377                 }
378                 spin_unlock(&hint->lock);
379                 last = hint->key.objectid + hint->key.offset;
380         } else {
381                 if (hint)
382                         last = max(hint->key.objectid, search_start);
383                 else
384                         last = search_start;
385         }
386 again:
387         while (1) {
388                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
389                 if (!cache)
390                         break;
391
392                 spin_lock(&cache->lock);
393                 last = cache->key.objectid + cache->key.offset;
394                 used = btrfs_block_group_used(&cache->item);
395
396                 if (block_group_bits(cache, data)) {
397                         free_check = div_factor(cache->key.offset, factor);
398                         if (used + cache->pinned + cache->reserved <
399                             free_check) {
400                                 found_group = cache;
401                                 spin_unlock(&cache->lock);
402                                 goto found;
403                         }
404                 }
405                 spin_unlock(&cache->lock);
406                 cond_resched();
407         }
408         if (!wrapped) {
409                 last = search_start;
410                 wrapped = 1;
411                 goto again;
412         }
413         if (!full_search && factor < 10) {
414                 last = search_start;
415                 full_search = 1;
416                 factor = 10;
417                 goto again;
418         }
419 found:
420         return found_group;
421 }
422
423 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
424                                                  struct btrfs_block_group_cache
425                                                  *hint, u64 search_start,
426                                                  int data, int owner)
427 {
428
429         struct btrfs_block_group_cache *ret;
430         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
431         return ret;
432 }
433
434 /* simple helper to search for an existing extent at a given offset */
435 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
436 {
437         int ret;
438         struct btrfs_key key;
439         struct btrfs_path *path;
440
441         path = btrfs_alloc_path();
442         BUG_ON(!path);
443         key.objectid = start;
444         key.offset = len;
445         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
446         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
447                                 0, 0);
448         btrfs_free_path(path);
449         return ret;
450 }
451
452 /*
453  * Back reference rules.  Back refs have three main goals:
454  *
455  * 1) differentiate between all holders of references to an extent so that
456  *    when a reference is dropped we can make sure it was a valid reference
457  *    before freeing the extent.
458  *
459  * 2) Provide enough information to quickly find the holders of an extent
460  *    if we notice a given block is corrupted or bad.
461  *
462  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
463  *    maintenance.  This is actually the same as #2, but with a slightly
464  *    different use case.
465  *
466  * File extents can be referenced by:
467  *
468  * - multiple snapshots, subvolumes, or different generations in one subvol
469  * - different files inside a single subvolume
470  * - different offsets inside a file (bookend extents in file.c)
471  *
472  * The extent ref structure has fields for:
473  *
474  * - Objectid of the subvolume root
475  * - Generation number of the tree holding the reference
476  * - objectid of the file holding the reference
477  * - number of references holding by parent node (alway 1 for tree blocks)
478  *
479  * Btree leaf may hold multiple references to a file extent. In most cases,
480  * these references are from same file and the corresponding offsets inside
481  * the file are close together.
482  *
483  * When a file extent is allocated the fields are filled in:
484  *     (root_key.objectid, trans->transid, inode objectid, 1)
485  *
486  * When a leaf is cow'd new references are added for every file extent found
487  * in the leaf.  It looks similar to the create case, but trans->transid will
488  * be different when the block is cow'd.
489  *
490  *     (root_key.objectid, trans->transid, inode objectid,
491  *      number of references in the leaf)
492  *
493  * When a file extent is removed either during snapshot deletion or
494  * file truncation, we find the corresponding back reference and check
495  * the following fields:
496  *
497  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
498  *      inode objectid)
499  *
500  * Btree extents can be referenced by:
501  *
502  * - Different subvolumes
503  * - Different generations of the same subvolume
504  *
505  * When a tree block is created, back references are inserted:
506  *
507  * (root->root_key.objectid, trans->transid, level, 1)
508  *
509  * When a tree block is cow'd, new back references are added for all the
510  * blocks it points to. If the tree block isn't in reference counted root,
511  * the old back references are removed. These new back references are of
512  * the form (trans->transid will have increased since creation):
513  *
514  * (root->root_key.objectid, trans->transid, level, 1)
515  *
516  * When a backref is in deleting, the following fields are checked:
517  *
518  * if backref was for a tree root:
519  *     (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
520  * else
521  *     (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
522  *
523  * Back Reference Key composing:
524  *
525  * The key objectid corresponds to the first byte in the extent, the key
526  * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
527  * byte of parent extent. If a extent is tree root, the key offset is set
528  * to the key objectid.
529  */
530
531 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
532                                           struct btrfs_root *root,
533                                           struct btrfs_path *path,
534                                           u64 bytenr, u64 parent,
535                                           u64 ref_root, u64 ref_generation,
536                                           u64 owner_objectid, int del)
537 {
538         struct btrfs_key key;
539         struct btrfs_extent_ref *ref;
540         struct extent_buffer *leaf;
541         u64 ref_objectid;
542         int ret;
543
544         key.objectid = bytenr;
545         key.type = BTRFS_EXTENT_REF_KEY;
546         key.offset = parent;
547
548         ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
549         if (ret < 0)
550                 goto out;
551         if (ret > 0) {
552                 ret = -ENOENT;
553                 goto out;
554         }
555
556         leaf = path->nodes[0];
557         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
558         ref_objectid = btrfs_ref_objectid(leaf, ref);
559         if (btrfs_ref_root(leaf, ref) != ref_root ||
560             btrfs_ref_generation(leaf, ref) != ref_generation ||
561             (ref_objectid != owner_objectid &&
562              ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
563                 ret = -EIO;
564                 WARN_ON(1);
565                 goto out;
566         }
567         ret = 0;
568 out:
569         return ret;
570 }
571
572 /*
573  * updates all the backrefs that are pending on update_list for the
574  * extent_root
575  */
576 static int noinline update_backrefs(struct btrfs_trans_handle *trans,
577                                     struct btrfs_root *extent_root,
578                                     struct btrfs_path *path,
579                                     struct list_head *update_list)
580 {
581         struct btrfs_key key;
582         struct btrfs_extent_ref *ref;
583         struct btrfs_fs_info *info = extent_root->fs_info;
584         struct pending_extent_op *op;
585         struct extent_buffer *leaf;
586         int ret = 0;
587         struct list_head *cur = update_list->next;
588         u64 ref_objectid;
589         u64 ref_root = extent_root->root_key.objectid;
590
591         op = list_entry(cur, struct pending_extent_op, list);
592
593 search:
594         key.objectid = op->bytenr;
595         key.type = BTRFS_EXTENT_REF_KEY;
596         key.offset = op->orig_parent;
597
598         ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
599         BUG_ON(ret);
600
601         leaf = path->nodes[0];
602
603 loop:
604         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
605
606         ref_objectid = btrfs_ref_objectid(leaf, ref);
607
608         if (btrfs_ref_root(leaf, ref) != ref_root ||
609             btrfs_ref_generation(leaf, ref) != op->orig_generation ||
610             (ref_objectid != op->level &&
611              ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
612                 printk(KERN_ERR "couldn't find %Lu, parent %Lu, root %Lu, "
613                        "owner %u\n", op->bytenr, op->orig_parent,
614                        ref_root, op->level);
615                 btrfs_print_leaf(extent_root, leaf);
616                 BUG();
617         }
618
619         key.objectid = op->bytenr;
620         key.offset = op->parent;
621         key.type = BTRFS_EXTENT_REF_KEY;
622         ret = btrfs_set_item_key_safe(trans, extent_root, path, &key);
623         BUG_ON(ret);
624         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
625         btrfs_set_ref_generation(leaf, ref, op->generation);
626
627         cur = cur->next;
628
629         list_del_init(&op->list);
630         unlock_extent(&info->extent_ins, op->bytenr,
631                       op->bytenr + op->num_bytes - 1, GFP_NOFS);
632         kfree(op);
633
634         if (cur == update_list) {
635                 btrfs_mark_buffer_dirty(path->nodes[0]);
636                 btrfs_release_path(extent_root, path);
637                 goto out;
638         }
639
640         op = list_entry(cur, struct pending_extent_op, list);
641
642         path->slots[0]++;
643         while (path->slots[0] < btrfs_header_nritems(leaf)) {
644                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
645                 if (key.objectid == op->bytenr &&
646                     key.type == BTRFS_EXTENT_REF_KEY)
647                         goto loop;
648                 path->slots[0]++;
649         }
650
651         btrfs_mark_buffer_dirty(path->nodes[0]);
652         btrfs_release_path(extent_root, path);
653         goto search;
654
655 out:
656         return 0;
657 }
658
659 static int noinline insert_extents(struct btrfs_trans_handle *trans,
660                                    struct btrfs_root *extent_root,
661                                    struct btrfs_path *path,
662                                    struct list_head *insert_list, int nr)
663 {
664         struct btrfs_key *keys;
665         u32 *data_size;
666         struct pending_extent_op *op;
667         struct extent_buffer *leaf;
668         struct list_head *cur = insert_list->next;
669         struct btrfs_fs_info *info = extent_root->fs_info;
670         u64 ref_root = extent_root->root_key.objectid;
671         int i = 0, last = 0, ret;
672         int total = nr * 2;
673
674         if (!nr)
675                 return 0;
676
677         keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS);
678         if (!keys)
679                 return -ENOMEM;
680
681         data_size = kzalloc(total * sizeof(u32), GFP_NOFS);
682         if (!data_size) {
683                 kfree(keys);
684                 return -ENOMEM;
685         }
686
687         list_for_each_entry(op, insert_list, list) {
688                 keys[i].objectid = op->bytenr;
689                 keys[i].offset = op->num_bytes;
690                 keys[i].type = BTRFS_EXTENT_ITEM_KEY;
691                 data_size[i] = sizeof(struct btrfs_extent_item);
692                 i++;
693
694                 keys[i].objectid = op->bytenr;
695                 keys[i].offset = op->parent;
696                 keys[i].type = BTRFS_EXTENT_REF_KEY;
697                 data_size[i] = sizeof(struct btrfs_extent_ref);
698                 i++;
699         }
700
701         op = list_entry(cur, struct pending_extent_op, list);
702         i = 0;
703         while (i < total) {
704                 int c;
705                 ret = btrfs_insert_some_items(trans, extent_root, path,
706                                               keys+i, data_size+i, total-i);
707                 BUG_ON(ret < 0);
708
709                 if (last && ret > 1)
710                         BUG();
711
712                 leaf = path->nodes[0];
713                 for (c = 0; c < ret; c++) {
714                         int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY;
715
716                         /*
717                          * if the first item we inserted was a backref, then
718                          * the EXTENT_ITEM will be the odd c's, else it will
719                          * be the even c's
720                          */
721                         if ((ref_first && (c % 2)) ||
722                             (!ref_first && !(c % 2))) {
723                                 struct btrfs_extent_item *itm;
724
725                                 itm = btrfs_item_ptr(leaf, path->slots[0] + c,
726                                                      struct btrfs_extent_item);
727                                 btrfs_set_extent_refs(path->nodes[0], itm, 1);
728                                 op->del++;
729                         } else {
730                                 struct btrfs_extent_ref *ref;
731
732                                 ref = btrfs_item_ptr(leaf, path->slots[0] + c,
733                                                      struct btrfs_extent_ref);
734                                 btrfs_set_ref_root(leaf, ref, ref_root);
735                                 btrfs_set_ref_generation(leaf, ref,
736                                                          op->generation);
737                                 btrfs_set_ref_objectid(leaf, ref, op->level);
738                                 btrfs_set_ref_num_refs(leaf, ref, 1);
739                                 op->del++;
740                         }
741
742                         /*
743                          * using del to see when its ok to free up the
744                          * pending_extent_op.  In the case where we insert the
745                          * last item on the list in order to help do batching
746                          * we need to not free the extent op until we actually
747                          * insert the extent_item
748                          */
749                         if (op->del == 2) {
750                                 unlock_extent(&info->extent_ins, op->bytenr,
751                                               op->bytenr + op->num_bytes - 1,
752                                               GFP_NOFS);
753                                 cur = cur->next;
754                                 list_del_init(&op->list);
755                                 kfree(op);
756                                 if (cur != insert_list)
757                                         op = list_entry(cur,
758                                                 struct pending_extent_op,
759                                                 list);
760                         }
761                 }
762                 btrfs_mark_buffer_dirty(leaf);
763                 btrfs_release_path(extent_root, path);
764
765                 /*
766                  * Ok backref's and items usually go right next to eachother,
767                  * but if we could only insert 1 item that means that we
768                  * inserted on the end of a leaf, and we have no idea what may
769                  * be on the next leaf so we just play it safe.  In order to
770                  * try and help this case we insert the last thing on our
771                  * insert list so hopefully it will end up being the last
772                  * thing on the leaf and everything else will be before it,
773                  * which will let us insert a whole bunch of items at the same
774                  * time.
775                  */
776                 if (ret == 1 && !last && (i + ret < total)) {
777                         /*
778                          * last: where we will pick up the next time around
779                          * i: our current key to insert, will be total - 1
780                          * cur: the current op we are screwing with
781                          * op: duh
782                          */
783                         last = i + ret;
784                         i = total - 1;
785                         cur = insert_list->prev;
786                         op = list_entry(cur, struct pending_extent_op, list);
787                 } else if (last) {
788                         /*
789                          * ok we successfully inserted the last item on the
790                          * list, lets reset everything
791                          *
792                          * i: our current key to insert, so where we left off
793                          *    last time
794                          * last: done with this
795                          * cur: the op we are messing with
796                          * op: duh
797                          * total: since we inserted the last key, we need to
798                          *        decrement total so we dont overflow
799                          */
800                         i = last;
801                         last = 0;
802                         total--;
803                         if (i < total) {
804                                 cur = insert_list->next;
805                                 op = list_entry(cur, struct pending_extent_op,
806                                                 list);
807                         }
808                 } else {
809                         i += ret;
810                 }
811
812                 cond_resched();
813         }
814         ret = 0;
815         kfree(keys);
816         kfree(data_size);
817         return ret;
818 }
819
820 static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
821                                           struct btrfs_root *root,
822                                           struct btrfs_path *path,
823                                           u64 bytenr, u64 parent,
824                                           u64 ref_root, u64 ref_generation,
825                                           u64 owner_objectid)
826 {
827         struct btrfs_key key;
828         struct extent_buffer *leaf;
829         struct btrfs_extent_ref *ref;
830         u32 num_refs;
831         int ret;
832
833         key.objectid = bytenr;
834         key.type = BTRFS_EXTENT_REF_KEY;
835         key.offset = parent;
836
837         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
838         if (ret == 0) {
839                 leaf = path->nodes[0];
840                 ref = btrfs_item_ptr(leaf, path->slots[0],
841                                      struct btrfs_extent_ref);
842                 btrfs_set_ref_root(leaf, ref, ref_root);
843                 btrfs_set_ref_generation(leaf, ref, ref_generation);
844                 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
845                 btrfs_set_ref_num_refs(leaf, ref, 1);
846         } else if (ret == -EEXIST) {
847                 u64 existing_owner;
848                 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
849                 leaf = path->nodes[0];
850                 ref = btrfs_item_ptr(leaf, path->slots[0],
851                                      struct btrfs_extent_ref);
852                 if (btrfs_ref_root(leaf, ref) != ref_root ||
853                     btrfs_ref_generation(leaf, ref) != ref_generation) {
854                         ret = -EIO;
855                         WARN_ON(1);
856                         goto out;
857                 }
858
859                 num_refs = btrfs_ref_num_refs(leaf, ref);
860                 BUG_ON(num_refs == 0);
861                 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
862
863                 existing_owner = btrfs_ref_objectid(leaf, ref);
864                 if (existing_owner != owner_objectid &&
865                     existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
866                         btrfs_set_ref_objectid(leaf, ref,
867                                         BTRFS_MULTIPLE_OBJECTIDS);
868                 }
869                 ret = 0;
870         } else {
871                 goto out;
872         }
873         btrfs_mark_buffer_dirty(path->nodes[0]);
874 out:
875         btrfs_release_path(root, path);
876         return ret;
877 }
878
879 static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
880                                           struct btrfs_root *root,
881                                           struct btrfs_path *path)
882 {
883         struct extent_buffer *leaf;
884         struct btrfs_extent_ref *ref;
885         u32 num_refs;
886         int ret = 0;
887
888         leaf = path->nodes[0];
889         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
890         num_refs = btrfs_ref_num_refs(leaf, ref);
891         BUG_ON(num_refs == 0);
892         num_refs -= 1;
893         if (num_refs == 0) {
894                 ret = btrfs_del_item(trans, root, path);
895         } else {
896                 btrfs_set_ref_num_refs(leaf, ref, num_refs);
897                 btrfs_mark_buffer_dirty(leaf);
898         }
899         btrfs_release_path(root, path);
900         return ret;
901 }
902
903 static void btrfs_issue_discard(struct block_device *bdev,
904                                 u64 start, u64 len)
905 {
906 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)
907         blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
908 #else
909         blkdev_issue_discard(bdev, start >> 9, len >> 9);
910 #endif
911 }
912
913
914 static int noinline free_extents(struct btrfs_trans_handle *trans,
915                                  struct btrfs_root *extent_root,
916                                  struct list_head *del_list)
917 {
918         struct btrfs_fs_info *info = extent_root->fs_info;
919         struct btrfs_path *path;
920         struct btrfs_key key, found_key;
921         struct extent_buffer *leaf;
922         struct list_head *cur;
923         struct pending_extent_op *op;
924         struct btrfs_extent_item *ei;
925         int ret, num_to_del, extent_slot = 0, found_extent = 0;
926         u32 refs;
927         u64 bytes_freed = 0;
928
929         path = btrfs_alloc_path();
930         if (!path)
931                 return -ENOMEM;
932         path->reada = 1;
933
934 search:
935         /* search for the backref for the current ref we want to delete */
936         cur = del_list->next;
937         op = list_entry(cur, struct pending_extent_op, list);
938         ret = lookup_extent_backref(trans, extent_root, path, op->bytenr,
939                                     op->orig_parent,
940                                     extent_root->root_key.objectid,
941                                     op->orig_generation, op->level, 1);
942         if (ret) {
943                 printk("Unable to find backref byte nr %Lu root %Lu gen %Lu "
944                        "owner %u\n", op->bytenr,
945                        extent_root->root_key.objectid, op->orig_generation,
946                        op->level);
947                 btrfs_print_leaf(extent_root, path->nodes[0]);
948                 WARN_ON(1);
949                 goto out;
950         }
951
952         extent_slot = path->slots[0];
953         num_to_del = 1;
954         found_extent = 0;
955
956         /*
957          * if we aren't the first item on the leaf we can move back one and see
958          * if our ref is right next to our extent item
959          */
960         if (likely(extent_slot)) {
961                 extent_slot--;
962                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
963                                       extent_slot);
964                 if (found_key.objectid == op->bytenr &&
965                     found_key.type == BTRFS_EXTENT_ITEM_KEY &&
966                     found_key.offset == op->num_bytes) {
967                         num_to_del++;
968                         found_extent = 1;
969                 }
970         }
971
972         /*
973          * if we didn't find the extent we need to delete the backref and then
974          * search for the extent item key so we can update its ref count
975          */
976         if (!found_extent) {
977                 key.objectid = op->bytenr;
978                 key.type = BTRFS_EXTENT_ITEM_KEY;
979                 key.offset = op->num_bytes;
980
981                 ret = remove_extent_backref(trans, extent_root, path);
982                 BUG_ON(ret);
983                 btrfs_release_path(extent_root, path);
984                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
985                 BUG_ON(ret);
986                 extent_slot = path->slots[0];
987         }
988
989         /* this is where we update the ref count for the extent */
990         leaf = path->nodes[0];
991         ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item);
992         refs = btrfs_extent_refs(leaf, ei);
993         BUG_ON(refs == 0);
994         refs--;
995         btrfs_set_extent_refs(leaf, ei, refs);
996
997         btrfs_mark_buffer_dirty(leaf);
998
999         /*
1000          * This extent needs deleting.  The reason cur_slot is extent_slot +
1001          * num_to_del is because extent_slot points to the slot where the extent
1002          * is, and if the backref was not right next to the extent we will be
1003          * deleting at least 1 item, and will want to start searching at the
1004          * slot directly next to extent_slot.  However if we did find the
1005          * backref next to the extent item them we will be deleting at least 2
1006          * items and will want to start searching directly after the ref slot
1007          */
1008         if (!refs) {
1009                 struct list_head *pos, *n, *end;
1010                 int cur_slot = extent_slot+num_to_del;
1011                 u64 super_used;
1012                 u64 root_used;
1013
1014                 path->slots[0] = extent_slot;
1015                 bytes_freed = op->num_bytes;
1016
1017                 mutex_lock(&info->pinned_mutex);
1018                 ret = pin_down_bytes(trans, extent_root, op->bytenr,
1019                                      op->num_bytes, op->level >=
1020                                      BTRFS_FIRST_FREE_OBJECTID);
1021                 mutex_unlock(&info->pinned_mutex);
1022                 BUG_ON(ret < 0);
1023                 op->del = ret;
1024
1025                 /*
1026                  * we need to see if we can delete multiple things at once, so
1027                  * start looping through the list of extents we are wanting to
1028                  * delete and see if their extent/backref's are right next to
1029                  * eachother and the extents only have 1 ref
1030                  */
1031                 for (pos = cur->next; pos != del_list; pos = pos->next) {
1032                         struct pending_extent_op *tmp;
1033
1034                         tmp = list_entry(pos, struct pending_extent_op, list);
1035
1036                         /* we only want to delete extent+ref at this stage */
1037                         if (cur_slot >= btrfs_header_nritems(leaf) - 1)
1038                                 break;
1039
1040                         btrfs_item_key_to_cpu(leaf, &found_key, cur_slot);
1041                         if (found_key.objectid != tmp->bytenr ||
1042                             found_key.type != BTRFS_EXTENT_ITEM_KEY ||
1043                             found_key.offset != tmp->num_bytes)
1044                                 break;
1045
1046                         /* check to make sure this extent only has one ref */
1047                         ei = btrfs_item_ptr(leaf, cur_slot,
1048                                             struct btrfs_extent_item);
1049                         if (btrfs_extent_refs(leaf, ei) != 1)
1050                                 break;
1051
1052                         btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1);
1053                         if (found_key.objectid != tmp->bytenr ||
1054                             found_key.type != BTRFS_EXTENT_REF_KEY ||
1055                             found_key.offset != tmp->orig_parent)
1056                                 break;
1057
1058                         /*
1059                          * the ref is right next to the extent, we can set the
1060                          * ref count to 0 since we will delete them both now
1061                          */
1062                         btrfs_set_extent_refs(leaf, ei, 0);
1063
1064                         /* pin down the bytes for this extent */
1065                         mutex_lock(&info->pinned_mutex);
1066                         ret = pin_down_bytes(trans, extent_root, tmp->bytenr,
1067                                              tmp->num_bytes, tmp->level >=
1068                                              BTRFS_FIRST_FREE_OBJECTID);
1069                         mutex_unlock(&info->pinned_mutex);
1070                         BUG_ON(ret < 0);
1071
1072                         /*
1073                          * use the del field to tell if we need to go ahead and
1074                          * free up the extent when we delete the item or not.
1075                          */
1076                         tmp->del = ret;
1077                         bytes_freed += tmp->num_bytes;
1078
1079                         num_to_del += 2;
1080                         cur_slot += 2;
1081                 }
1082                 end = pos;
1083
1084                 /* update the free space counters */
1085                 spin_lock_irq(&info->delalloc_lock);
1086                 super_used = btrfs_super_bytes_used(&info->super_copy);
1087                 btrfs_set_super_bytes_used(&info->super_copy,
1088                                            super_used - bytes_freed);
1089                 spin_unlock_irq(&info->delalloc_lock);
1090
1091                 root_used = btrfs_root_used(&extent_root->root_item);
1092                 btrfs_set_root_used(&extent_root->root_item,
1093                                     root_used - bytes_freed);
1094
1095                 /* delete the items */
1096                 ret = btrfs_del_items(trans, extent_root, path,
1097                                       path->slots[0], num_to_del);
1098                 BUG_ON(ret);
1099
1100                 /*
1101                  * loop through the extents we deleted and do the cleanup work
1102                  * on them
1103                  */
1104                 for (pos = cur, n = pos->next; pos != end;
1105                      pos = n, n = pos->next) {
1106                         struct pending_extent_op *tmp;
1107 #ifdef BIO_RW_DISCARD
1108                         u64 map_length;
1109                         struct btrfs_multi_bio *multi = NULL;
1110 #endif
1111                         tmp = list_entry(pos, struct pending_extent_op, list);
1112
1113                         /*
1114                          * remember tmp->del tells us wether or not we pinned
1115                          * down the extent
1116                          */
1117                         ret = update_block_group(trans, extent_root,
1118                                                  tmp->bytenr, tmp->num_bytes, 0,
1119                                                  tmp->del);
1120                         BUG_ON(ret);
1121
1122 #ifdef BIO_RW_DISCARD
1123                         map_length = tmp->num_bytes;
1124                         ret = btrfs_map_block(&info->mapping_tree, READ,
1125                                               tmp->bytenr, &map_length, &multi,
1126                                               0);
1127                         if (!ret) {
1128                                 struct btrfs_bio_stripe *stripe;
1129                                 int i;
1130
1131                                 stripe = multi->stripes;
1132
1133                                 if (map_length > tmp->num_bytes)
1134                                         map_length = tmp->num_bytes;
1135
1136                                 for (i = 0; i < multi->num_stripes;
1137                                      i++, stripe++)
1138                                         btrfs_issue_discard(stripe->dev->bdev,
1139                                                             stripe->physical,
1140                                                             map_length);
1141                                 kfree(multi);
1142                         }
1143 #endif
1144                         list_del_init(&tmp->list);
1145                         unlock_extent(&info->extent_ins, tmp->bytenr,
1146                                       tmp->bytenr + tmp->num_bytes - 1,
1147                                       GFP_NOFS);
1148                         kfree(tmp);
1149                 }
1150         } else if (refs && found_extent) {
1151                 /*
1152                  * the ref and extent were right next to eachother, but the
1153                  * extent still has a ref, so just free the backref and keep
1154                  * going
1155                  */
1156                 ret = remove_extent_backref(trans, extent_root, path);
1157                 BUG_ON(ret);
1158
1159                 list_del_init(&op->list);
1160                 unlock_extent(&info->extent_ins, op->bytenr,
1161                               op->bytenr + op->num_bytes - 1, GFP_NOFS);
1162                 kfree(op);
1163         } else {
1164                 /*
1165                  * the extent has multiple refs and the backref we were looking
1166                  * for was not right next to it, so just unlock and go next,
1167                  * we're good to go
1168                  */
1169                 list_del_init(&op->list);
1170                 unlock_extent(&info->extent_ins, op->bytenr,
1171                               op->bytenr + op->num_bytes - 1, GFP_NOFS);
1172                 kfree(op);
1173         }
1174
1175         btrfs_release_path(extent_root, path);
1176         if (!list_empty(del_list))
1177                 goto search;
1178
1179 out:
1180         btrfs_free_path(path);
1181         return ret;
1182 }
1183
1184 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1185                                      struct btrfs_root *root, u64 bytenr,
1186                                      u64 orig_parent, u64 parent,
1187                                      u64 orig_root, u64 ref_root,
1188                                      u64 orig_generation, u64 ref_generation,
1189                                      u64 owner_objectid)
1190 {
1191         int ret;
1192         struct btrfs_root *extent_root = root->fs_info->extent_root;
1193         struct btrfs_path *path;
1194
1195         if (root == root->fs_info->extent_root) {
1196                 struct pending_extent_op *extent_op;
1197                 u64 num_bytes;
1198
1199                 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
1200                 num_bytes = btrfs_level_size(root, (int)owner_objectid);
1201                 mutex_lock(&root->fs_info->extent_ins_mutex);
1202                 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
1203                                 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
1204                         u64 priv;
1205                         ret = get_state_private(&root->fs_info->extent_ins,
1206                                                 bytenr, &priv);
1207                         BUG_ON(ret);
1208                         extent_op = (struct pending_extent_op *)
1209                                                         (unsigned long)priv;
1210                         BUG_ON(extent_op->parent != orig_parent);
1211                         BUG_ON(extent_op->generation != orig_generation);
1212
1213                         extent_op->parent = parent;
1214                         extent_op->generation = ref_generation;
1215                 } else {
1216                         extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
1217                         BUG_ON(!extent_op);
1218
1219                         extent_op->type = PENDING_BACKREF_UPDATE;
1220                         extent_op->bytenr = bytenr;
1221                         extent_op->num_bytes = num_bytes;
1222                         extent_op->parent = parent;
1223                         extent_op->orig_parent = orig_parent;
1224                         extent_op->generation = ref_generation;
1225                         extent_op->orig_generation = orig_generation;
1226                         extent_op->level = (int)owner_objectid;
1227                         INIT_LIST_HEAD(&extent_op->list);
1228                         extent_op->del = 0;
1229
1230                         set_extent_bits(&root->fs_info->extent_ins,
1231                                         bytenr, bytenr + num_bytes - 1,
1232                                         EXTENT_WRITEBACK, GFP_NOFS);
1233                         set_state_private(&root->fs_info->extent_ins,
1234                                           bytenr, (unsigned long)extent_op);
1235                 }
1236                 mutex_unlock(&root->fs_info->extent_ins_mutex);
1237                 return 0;
1238         }
1239
1240         path = btrfs_alloc_path();
1241         if (!path)
1242                 return -ENOMEM;
1243         ret = lookup_extent_backref(trans, extent_root, path,
1244                                     bytenr, orig_parent, orig_root,
1245                                     orig_generation, owner_objectid, 1);
1246         if (ret)
1247                 goto out;
1248         ret = remove_extent_backref(trans, extent_root, path);
1249         if (ret)
1250                 goto out;
1251         ret = insert_extent_backref(trans, extent_root, path, bytenr,
1252                                     parent, ref_root, ref_generation,
1253                                     owner_objectid);
1254         BUG_ON(ret);
1255         finish_current_insert(trans, extent_root, 0);
1256         del_pending_extents(trans, extent_root, 0);
1257 out:
1258         btrfs_free_path(path);
1259         return ret;
1260 }
1261
1262 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1263                             struct btrfs_root *root, u64 bytenr,
1264                             u64 orig_parent, u64 parent,
1265                             u64 ref_root, u64 ref_generation,
1266                             u64 owner_objectid)
1267 {
1268         int ret;
1269         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1270             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1271                 return 0;
1272         ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
1273                                         parent, ref_root, ref_root,
1274                                         ref_generation, ref_generation,
1275                                         owner_objectid);
1276         return ret;
1277 }
1278
1279 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1280                                   struct btrfs_root *root, u64 bytenr,
1281                                   u64 orig_parent, u64 parent,
1282                                   u64 orig_root, u64 ref_root,
1283                                   u64 orig_generation, u64 ref_generation,
1284                                   u64 owner_objectid)
1285 {
1286         struct btrfs_path *path;
1287         int ret;
1288         struct btrfs_key key;
1289         struct extent_buffer *l;
1290         struct btrfs_extent_item *item;
1291         u32 refs;
1292
1293         path = btrfs_alloc_path();
1294         if (!path)
1295                 return -ENOMEM;
1296
1297         path->reada = 1;
1298         key.objectid = bytenr;
1299         key.type = BTRFS_EXTENT_ITEM_KEY;
1300         key.offset = (u64)-1;
1301
1302         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1303                                 0, 1);
1304         if (ret < 0)
1305                 return ret;
1306         BUG_ON(ret == 0 || path->slots[0] == 0);
1307
1308         path->slots[0]--;
1309         l = path->nodes[0];
1310
1311         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1312         if (key.objectid != bytenr) {
1313                 btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]);
1314                 printk("wanted %Lu found %Lu\n", bytenr, key.objectid);
1315                 BUG();
1316         }
1317         BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
1318
1319         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1320         refs = btrfs_extent_refs(l, item);
1321         btrfs_set_extent_refs(l, item, refs + 1);
1322         btrfs_mark_buffer_dirty(path->nodes[0]);
1323
1324         btrfs_release_path(root->fs_info->extent_root, path);
1325
1326         path->reada = 1;
1327         ret = insert_extent_backref(trans, root->fs_info->extent_root,
1328                                     path, bytenr, parent,
1329                                     ref_root, ref_generation,
1330                                     owner_objectid);
1331         BUG_ON(ret);
1332         finish_current_insert(trans, root->fs_info->extent_root, 0);
1333         del_pending_extents(trans, root->fs_info->extent_root, 0);
1334
1335         btrfs_free_path(path);
1336         return 0;
1337 }
1338
1339 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1340                          struct btrfs_root *root,
1341                          u64 bytenr, u64 num_bytes, u64 parent,
1342                          u64 ref_root, u64 ref_generation,
1343                          u64 owner_objectid)
1344 {
1345         int ret;
1346         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1347             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1348                 return 0;
1349         ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
1350                                      0, ref_root, 0, ref_generation,
1351                                      owner_objectid);
1352         return ret;
1353 }
1354
1355 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1356                          struct btrfs_root *root)
1357 {
1358         finish_current_insert(trans, root->fs_info->extent_root, 1);
1359         del_pending_extents(trans, root->fs_info->extent_root, 1);
1360         return 0;
1361 }
1362
1363 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
1364                             struct btrfs_root *root, u64 bytenr,
1365                             u64 num_bytes, u32 *refs)
1366 {
1367         struct btrfs_path *path;
1368         int ret;
1369         struct btrfs_key key;
1370         struct extent_buffer *l;
1371         struct btrfs_extent_item *item;
1372
1373         WARN_ON(num_bytes < root->sectorsize);
1374         path = btrfs_alloc_path();
1375         path->reada = 1;
1376         key.objectid = bytenr;
1377         key.offset = num_bytes;
1378         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1379         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1380                                 0, 0);
1381         if (ret < 0)
1382                 goto out;
1383         if (ret != 0) {
1384                 btrfs_print_leaf(root, path->nodes[0]);
1385                 printk("failed to find block number %Lu\n", bytenr);
1386                 BUG();
1387         }
1388         l = path->nodes[0];
1389         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1390         *refs = btrfs_extent_refs(l, item);
1391 out:
1392         btrfs_free_path(path);
1393         return 0;
1394 }
1395
1396 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1397                           struct btrfs_root *root, u64 bytenr)
1398 {
1399         struct btrfs_root *extent_root = root->fs_info->extent_root;
1400         struct btrfs_path *path;
1401         struct extent_buffer *leaf;
1402         struct btrfs_extent_ref *ref_item;
1403         struct btrfs_key key;
1404         struct btrfs_key found_key;
1405         u64 ref_root;
1406         u64 last_snapshot;
1407         u32 nritems;
1408         int ret;
1409
1410         key.objectid = bytenr;
1411         key.offset = (u64)-1;
1412         key.type = BTRFS_EXTENT_ITEM_KEY;
1413
1414         path = btrfs_alloc_path();
1415         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1416         if (ret < 0)
1417                 goto out;
1418         BUG_ON(ret == 0);
1419
1420         ret = -ENOENT;
1421         if (path->slots[0] == 0)
1422                 goto out;
1423
1424         path->slots[0]--;
1425         leaf = path->nodes[0];
1426         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1427
1428         if (found_key.objectid != bytenr ||
1429             found_key.type != BTRFS_EXTENT_ITEM_KEY)
1430                 goto out;
1431
1432         last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1433         while (1) {
1434                 leaf = path->nodes[0];
1435                 nritems = btrfs_header_nritems(leaf);
1436                 if (path->slots[0] >= nritems) {
1437                         ret = btrfs_next_leaf(extent_root, path);
1438                         if (ret < 0)
1439                                 goto out;
1440                         if (ret == 0)
1441                                 continue;
1442                         break;
1443                 }
1444                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1445                 if (found_key.objectid != bytenr)
1446                         break;
1447
1448                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
1449                         path->slots[0]++;
1450                         continue;
1451                 }
1452
1453                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1454                                           struct btrfs_extent_ref);
1455                 ref_root = btrfs_ref_root(leaf, ref_item);
1456                 if (ref_root != root->root_key.objectid &&
1457                     ref_root != BTRFS_TREE_LOG_OBJECTID) {
1458                         ret = 1;
1459                         goto out;
1460                 }
1461                 if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) {
1462                         ret = 1;
1463                         goto out;
1464                 }
1465
1466                 path->slots[0]++;
1467         }
1468         ret = 0;
1469 out:
1470         btrfs_free_path(path);
1471         return ret;
1472 }
1473
1474 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1475                     struct extent_buffer *buf, u32 nr_extents)
1476 {
1477         struct btrfs_key key;
1478         struct btrfs_file_extent_item *fi;
1479         u64 root_gen;
1480         u32 nritems;
1481         int i;
1482         int level;
1483         int ret = 0;
1484         int shared = 0;
1485
1486         if (!root->ref_cows)
1487                 return 0;
1488
1489         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1490                 shared = 0;
1491                 root_gen = root->root_key.offset;
1492         } else {
1493                 shared = 1;
1494                 root_gen = trans->transid - 1;
1495         }
1496
1497         level = btrfs_header_level(buf);
1498         nritems = btrfs_header_nritems(buf);
1499
1500         if (level == 0) {
1501                 struct btrfs_leaf_ref *ref;
1502                 struct btrfs_extent_info *info;
1503
1504                 ref = btrfs_alloc_leaf_ref(root, nr_extents);
1505                 if (!ref) {
1506                         ret = -ENOMEM;
1507                         goto out;
1508                 }
1509
1510                 ref->root_gen = root_gen;
1511                 ref->bytenr = buf->start;
1512                 ref->owner = btrfs_header_owner(buf);
1513                 ref->generation = btrfs_header_generation(buf);
1514                 ref->nritems = nr_extents;
1515                 info = ref->extents;
1516
1517                 for (i = 0; nr_extents > 0 && i < nritems; i++) {
1518                         u64 disk_bytenr;
1519                         btrfs_item_key_to_cpu(buf, &key, i);
1520                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1521                                 continue;
1522                         fi = btrfs_item_ptr(buf, i,
1523                                             struct btrfs_file_extent_item);
1524                         if (btrfs_file_extent_type(buf, fi) ==
1525                             BTRFS_FILE_EXTENT_INLINE)
1526                                 continue;
1527                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1528                         if (disk_bytenr == 0)
1529                                 continue;
1530
1531                         info->bytenr = disk_bytenr;
1532                         info->num_bytes =
1533                                 btrfs_file_extent_disk_num_bytes(buf, fi);
1534                         info->objectid = key.objectid;
1535                         info->offset = key.offset;
1536                         info++;
1537                 }
1538
1539                 ret = btrfs_add_leaf_ref(root, ref, shared);
1540                 if (ret == -EEXIST && shared) {
1541                         struct btrfs_leaf_ref *old;
1542                         old = btrfs_lookup_leaf_ref(root, ref->bytenr);
1543                         BUG_ON(!old);
1544                         btrfs_remove_leaf_ref(root, old);
1545                         btrfs_free_leaf_ref(root, old);
1546                         ret = btrfs_add_leaf_ref(root, ref, shared);
1547                 }
1548                 WARN_ON(ret);
1549                 btrfs_free_leaf_ref(root, ref);
1550         }
1551 out:
1552         return ret;
1553 }
1554
1555 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1556                   struct extent_buffer *orig_buf, struct extent_buffer *buf,
1557                   u32 *nr_extents)
1558 {
1559         u64 bytenr;
1560         u64 ref_root;
1561         u64 orig_root;
1562         u64 ref_generation;
1563         u64 orig_generation;
1564         u32 nritems;
1565         u32 nr_file_extents = 0;
1566         struct btrfs_key key;
1567         struct btrfs_file_extent_item *fi;
1568         int i;
1569         int level;
1570         int ret = 0;
1571         int faili = 0;
1572         int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1573                             u64, u64, u64, u64, u64, u64, u64, u64);
1574
1575         ref_root = btrfs_header_owner(buf);
1576         ref_generation = btrfs_header_generation(buf);
1577         orig_root = btrfs_header_owner(orig_buf);
1578         orig_generation = btrfs_header_generation(orig_buf);
1579
1580         nritems = btrfs_header_nritems(buf);
1581         level = btrfs_header_level(buf);
1582
1583         if (root->ref_cows) {
1584                 process_func = __btrfs_inc_extent_ref;
1585         } else {
1586                 if (level == 0 &&
1587                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1588                         goto out;
1589                 if (level != 0 &&
1590                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1591                         goto out;
1592                 process_func = __btrfs_update_extent_ref;
1593         }
1594
1595         for (i = 0; i < nritems; i++) {
1596                 cond_resched();
1597                 if (level == 0) {
1598                         btrfs_item_key_to_cpu(buf, &key, i);
1599                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1600                                 continue;
1601                         fi = btrfs_item_ptr(buf, i,
1602                                             struct btrfs_file_extent_item);
1603                         if (btrfs_file_extent_type(buf, fi) ==
1604                             BTRFS_FILE_EXTENT_INLINE)
1605                                 continue;
1606                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1607                         if (bytenr == 0)
1608                                 continue;
1609
1610                         nr_file_extents++;
1611
1612                         ret = process_func(trans, root, bytenr,
1613                                            orig_buf->start, buf->start,
1614                                            orig_root, ref_root,
1615                                            orig_generation, ref_generation,
1616                                            key.objectid);
1617
1618                         if (ret) {
1619                                 faili = i;
1620                                 WARN_ON(1);
1621                                 goto fail;
1622                         }
1623                 } else {
1624                         bytenr = btrfs_node_blockptr(buf, i);
1625                         ret = process_func(trans, root, bytenr,
1626                                            orig_buf->start, buf->start,
1627                                            orig_root, ref_root,
1628                                            orig_generation, ref_generation,
1629                                            level - 1);
1630                         if (ret) {
1631                                 faili = i;
1632                                 WARN_ON(1);
1633                                 goto fail;
1634                         }
1635                 }
1636         }
1637 out:
1638         if (nr_extents) {
1639                 if (level == 0)
1640                         *nr_extents = nr_file_extents;
1641                 else
1642                         *nr_extents = nritems;
1643         }
1644         return 0;
1645 fail:
1646         WARN_ON(1);
1647         return ret;
1648 }
1649
1650 int btrfs_update_ref(struct btrfs_trans_handle *trans,
1651                      struct btrfs_root *root, struct extent_buffer *orig_buf,
1652                      struct extent_buffer *buf, int start_slot, int nr)
1653
1654 {
1655         u64 bytenr;
1656         u64 ref_root;
1657         u64 orig_root;
1658         u64 ref_generation;
1659         u64 orig_generation;
1660         struct btrfs_key key;
1661         struct btrfs_file_extent_item *fi;
1662         int i;
1663         int ret;
1664         int slot;
1665         int level;
1666
1667         BUG_ON(start_slot < 0);
1668         BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1669
1670         ref_root = btrfs_header_owner(buf);
1671         ref_generation = btrfs_header_generation(buf);
1672         orig_root = btrfs_header_owner(orig_buf);
1673         orig_generation = btrfs_header_generation(orig_buf);
1674         level = btrfs_header_level(buf);
1675
1676         if (!root->ref_cows) {
1677                 if (level == 0 &&
1678                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1679                         return 0;
1680                 if (level != 0 &&
1681                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1682                         return 0;
1683         }
1684
1685         for (i = 0, slot = start_slot; i < nr; i++, slot++) {
1686                 cond_resched();
1687                 if (level == 0) {
1688                         btrfs_item_key_to_cpu(buf, &key, slot);
1689                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1690                                 continue;
1691                         fi = btrfs_item_ptr(buf, slot,
1692                                             struct btrfs_file_extent_item);
1693                         if (btrfs_file_extent_type(buf, fi) ==
1694                             BTRFS_FILE_EXTENT_INLINE)
1695                                 continue;
1696                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1697                         if (bytenr == 0)
1698                                 continue;
1699                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1700                                             orig_buf->start, buf->start,
1701                                             orig_root, ref_root,
1702                                             orig_generation, ref_generation,
1703                                             key.objectid);
1704                         if (ret)
1705                                 goto fail;
1706                 } else {
1707                         bytenr = btrfs_node_blockptr(buf, slot);
1708                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1709                                             orig_buf->start, buf->start,
1710                                             orig_root, ref_root,
1711                                             orig_generation, ref_generation,
1712                                             level - 1);
1713                         if (ret)
1714                                 goto fail;
1715                 }
1716         }
1717         return 0;
1718 fail:
1719         WARN_ON(1);
1720         return -1;
1721 }
1722
1723 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1724                                  struct btrfs_root *root,
1725                                  struct btrfs_path *path,
1726                                  struct btrfs_block_group_cache *cache)
1727 {
1728         int ret;
1729         int pending_ret;
1730         struct btrfs_root *extent_root = root->fs_info->extent_root;
1731         unsigned long bi;
1732         struct extent_buffer *leaf;
1733
1734         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1735         if (ret < 0)
1736                 goto fail;
1737         BUG_ON(ret);
1738
1739         leaf = path->nodes[0];
1740         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1741         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1742         btrfs_mark_buffer_dirty(leaf);
1743         btrfs_release_path(extent_root, path);
1744 fail:
1745         finish_current_insert(trans, extent_root, 0);
1746         pending_ret = del_pending_extents(trans, extent_root, 0);
1747         if (ret)
1748                 return ret;
1749         if (pending_ret)
1750                 return pending_ret;
1751         return 0;
1752
1753 }
1754
1755 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1756                                    struct btrfs_root *root)
1757 {
1758         struct btrfs_block_group_cache *cache, *entry;
1759         struct rb_node *n;
1760         int err = 0;
1761         int werr = 0;
1762         struct btrfs_path *path;
1763         u64 last = 0;
1764
1765         path = btrfs_alloc_path();
1766         if (!path)
1767                 return -ENOMEM;
1768
1769         while(1) {
1770                 cache = NULL;
1771                 spin_lock(&root->fs_info->block_group_cache_lock);
1772                 for (n = rb_first(&root->fs_info->block_group_cache_tree);
1773                      n; n = rb_next(n)) {
1774                         entry = rb_entry(n, struct btrfs_block_group_cache,
1775                                          cache_node);
1776                         if (entry->dirty) {
1777                                 cache = entry;
1778                                 break;
1779                         }
1780                 }
1781                 spin_unlock(&root->fs_info->block_group_cache_lock);
1782
1783                 if (!cache)
1784                         break;
1785
1786                 cache->dirty = 0;
1787                 last += cache->key.offset;
1788
1789                 err = write_one_cache_group(trans, root,
1790                                             path, cache);
1791                 /*
1792                  * if we fail to write the cache group, we want
1793                  * to keep it marked dirty in hopes that a later
1794                  * write will work
1795                  */
1796                 if (err) {
1797                         werr = err;
1798                         continue;
1799                 }
1800         }
1801         btrfs_free_path(path);
1802         return werr;
1803 }
1804
1805 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1806                              u64 total_bytes, u64 bytes_used,
1807                              struct btrfs_space_info **space_info)
1808 {
1809         struct btrfs_space_info *found;
1810
1811         found = __find_space_info(info, flags);
1812         if (found) {
1813                 spin_lock(&found->lock);
1814                 found->total_bytes += total_bytes;
1815                 found->bytes_used += bytes_used;
1816                 found->full = 0;
1817                 spin_unlock(&found->lock);
1818                 *space_info = found;
1819                 return 0;
1820         }
1821         found = kzalloc(sizeof(*found), GFP_NOFS);
1822         if (!found)
1823                 return -ENOMEM;
1824
1825         list_add(&found->list, &info->space_info);
1826         INIT_LIST_HEAD(&found->block_groups);
1827         init_rwsem(&found->groups_sem);
1828         spin_lock_init(&found->lock);
1829         found->flags = flags;
1830         found->total_bytes = total_bytes;
1831         found->bytes_used = bytes_used;
1832         found->bytes_pinned = 0;
1833         found->bytes_reserved = 0;
1834         found->bytes_readonly = 0;
1835         found->full = 0;
1836         found->force_alloc = 0;
1837         *space_info = found;
1838         return 0;
1839 }
1840
1841 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1842 {
1843         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1844                                    BTRFS_BLOCK_GROUP_RAID1 |
1845                                    BTRFS_BLOCK_GROUP_RAID10 |
1846                                    BTRFS_BLOCK_GROUP_DUP);
1847         if (extra_flags) {
1848                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1849                         fs_info->avail_data_alloc_bits |= extra_flags;
1850                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1851                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1852                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1853                         fs_info->avail_system_alloc_bits |= extra_flags;
1854         }
1855 }
1856
1857 static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
1858 {
1859         spin_lock(&cache->space_info->lock);
1860         spin_lock(&cache->lock);
1861         if (!cache->ro) {
1862                 cache->space_info->bytes_readonly += cache->key.offset -
1863                                         btrfs_block_group_used(&cache->item);
1864                 cache->ro = 1;
1865         }
1866         spin_unlock(&cache->lock);
1867         spin_unlock(&cache->space_info->lock);
1868 }
1869
1870 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1871 {
1872         u64 num_devices = root->fs_info->fs_devices->rw_devices;
1873
1874         if (num_devices == 1)
1875                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1876         if (num_devices < 4)
1877                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1878
1879         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1880             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1881                       BTRFS_BLOCK_GROUP_RAID10))) {
1882                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1883         }
1884
1885         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1886             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1887                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1888         }
1889
1890         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1891             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1892              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1893              (flags & BTRFS_BLOCK_GROUP_DUP)))
1894                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1895         return flags;
1896 }
1897
1898 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1899                           struct btrfs_root *extent_root, u64 alloc_bytes,
1900                           u64 flags, int force)
1901 {
1902         struct btrfs_space_info *space_info;
1903         u64 thresh;
1904         int ret = 0;
1905
1906         mutex_lock(&extent_root->fs_info->chunk_mutex);
1907
1908         flags = btrfs_reduce_alloc_profile(extent_root, flags);
1909
1910         space_info = __find_space_info(extent_root->fs_info, flags);
1911         if (!space_info) {
1912                 ret = update_space_info(extent_root->fs_info, flags,
1913                                         0, 0, &space_info);
1914                 BUG_ON(ret);
1915         }
1916         BUG_ON(!space_info);
1917
1918         spin_lock(&space_info->lock);
1919         if (space_info->force_alloc) {
1920                 force = 1;
1921                 space_info->force_alloc = 0;
1922         }
1923         if (space_info->full) {
1924                 spin_unlock(&space_info->lock);
1925                 goto out;
1926         }
1927
1928         thresh = space_info->total_bytes - space_info->bytes_readonly;
1929         thresh = div_factor(thresh, 6);
1930         if (!force &&
1931            (space_info->bytes_used + space_info->bytes_pinned +
1932             space_info->bytes_reserved + alloc_bytes) < thresh) {
1933                 spin_unlock(&space_info->lock);
1934                 goto out;
1935         }
1936         spin_unlock(&space_info->lock);
1937
1938         ret = btrfs_alloc_chunk(trans, extent_root, flags);
1939         if (ret) {
1940 printk("space info full %Lu\n", flags);
1941                 space_info->full = 1;
1942         }
1943 out:
1944         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1945         return ret;
1946 }
1947
1948 static int update_block_group(struct btrfs_trans_handle *trans,
1949                               struct btrfs_root *root,
1950                               u64 bytenr, u64 num_bytes, int alloc,
1951                               int mark_free)
1952 {
1953         struct btrfs_block_group_cache *cache;
1954         struct btrfs_fs_info *info = root->fs_info;
1955         u64 total = num_bytes;
1956         u64 old_val;
1957         u64 byte_in_group;
1958
1959         while(total) {
1960                 cache = btrfs_lookup_block_group(info, bytenr);
1961                 if (!cache)
1962                         return -1;
1963                 byte_in_group = bytenr - cache->key.objectid;
1964                 WARN_ON(byte_in_group > cache->key.offset);
1965
1966                 spin_lock(&cache->space_info->lock);
1967                 spin_lock(&cache->lock);
1968                 cache->dirty = 1;
1969                 old_val = btrfs_block_group_used(&cache->item);
1970                 num_bytes = min(total, cache->key.offset - byte_in_group);
1971                 if (alloc) {
1972                         old_val += num_bytes;
1973                         cache->space_info->bytes_used += num_bytes;
1974                         if (cache->ro) {
1975                                 cache->space_info->bytes_readonly -= num_bytes;
1976                                 WARN_ON(1);
1977                         }
1978                         btrfs_set_block_group_used(&cache->item, old_val);
1979                         spin_unlock(&cache->lock);
1980                         spin_unlock(&cache->space_info->lock);
1981                 } else {
1982                         old_val -= num_bytes;
1983                         cache->space_info->bytes_used -= num_bytes;
1984                         if (cache->ro)
1985                                 cache->space_info->bytes_readonly += num_bytes;
1986                         btrfs_set_block_group_used(&cache->item, old_val);
1987                         spin_unlock(&cache->lock);
1988                         spin_unlock(&cache->space_info->lock);
1989                         if (mark_free) {
1990                                 int ret;
1991                                 ret = btrfs_add_free_space(cache, bytenr,
1992                                                            num_bytes);
1993                                 if (ret)
1994                                         return -1;
1995                         }
1996                 }
1997                 total -= num_bytes;
1998                 bytenr += num_bytes;
1999         }
2000         return 0;
2001 }
2002
2003 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
2004 {
2005         struct btrfs_block_group_cache *cache;
2006
2007         cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
2008         if (!cache)
2009                 return 0;
2010
2011         return cache->key.objectid;
2012 }
2013
2014 int btrfs_update_pinned_extents(struct btrfs_root *root,
2015                                 u64 bytenr, u64 num, int pin)
2016 {
2017         u64 len;
2018         struct btrfs_block_group_cache *cache;
2019         struct btrfs_fs_info *fs_info = root->fs_info;
2020
2021         WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex));
2022         if (pin) {
2023                 set_extent_dirty(&fs_info->pinned_extents,
2024                                 bytenr, bytenr + num - 1, GFP_NOFS);
2025         } else {
2026                 clear_extent_dirty(&fs_info->pinned_extents,
2027                                 bytenr, bytenr + num - 1, GFP_NOFS);
2028         }
2029         while (num > 0) {
2030                 cache = btrfs_lookup_block_group(fs_info, bytenr);
2031                 BUG_ON(!cache);
2032                 len = min(num, cache->key.offset -
2033                           (bytenr - cache->key.objectid));
2034                 if (pin) {
2035                         spin_lock(&cache->space_info->lock);
2036                         spin_lock(&cache->lock);
2037                         cache->pinned += len;
2038                         cache->space_info->bytes_pinned += len;
2039                         spin_unlock(&cache->lock);
2040                         spin_unlock(&cache->space_info->lock);
2041                         fs_info->total_pinned += len;
2042                 } else {
2043                         spin_lock(&cache->space_info->lock);
2044                         spin_lock(&cache->lock);
2045                         cache->pinned -= len;
2046                         cache->space_info->bytes_pinned -= len;
2047                         spin_unlock(&cache->lock);
2048                         spin_unlock(&cache->space_info->lock);
2049                         fs_info->total_pinned -= len;
2050                         if (cache->cached)
2051                                 btrfs_add_free_space(cache, bytenr, len);
2052                 }
2053                 bytenr += len;
2054                 num -= len;
2055         }
2056         return 0;
2057 }
2058
2059 static int update_reserved_extents(struct btrfs_root *root,
2060                                    u64 bytenr, u64 num, int reserve)
2061 {
2062         u64 len;
2063         struct btrfs_block_group_cache *cache;
2064         struct btrfs_fs_info *fs_info = root->fs_info;
2065
2066         while (num > 0) {
2067                 cache = btrfs_lookup_block_group(fs_info, bytenr);
2068                 BUG_ON(!cache);
2069                 len = min(num, cache->key.offset -
2070                           (bytenr - cache->key.objectid));
2071
2072                 spin_lock(&cache->space_info->lock);
2073                 spin_lock(&cache->lock);
2074                 if (reserve) {
2075                         cache->reserved += len;
2076                         cache->space_info->bytes_reserved += len;
2077                 } else {
2078                         cache->reserved -= len;
2079                         cache->space_info->bytes_reserved -= len;
2080                 }
2081                 spin_unlock(&cache->lock);
2082                 spin_unlock(&cache->space_info->lock);
2083                 bytenr += len;
2084                 num -= len;
2085         }
2086         return 0;
2087 }
2088
2089 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
2090 {
2091         u64 last = 0;
2092         u64 start;
2093         u64 end;
2094         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
2095         int ret;
2096
2097         mutex_lock(&root->fs_info->pinned_mutex);
2098         while(1) {
2099                 ret = find_first_extent_bit(pinned_extents, last,
2100                                             &start, &end, EXTENT_DIRTY);
2101                 if (ret)
2102                         break;
2103                 set_extent_dirty(copy, start, end, GFP_NOFS);
2104                 last = end + 1;
2105         }
2106         mutex_unlock(&root->fs_info->pinned_mutex);
2107         return 0;
2108 }
2109
2110 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2111                                struct btrfs_root *root,
2112                                struct extent_io_tree *unpin)
2113 {
2114         u64 start;
2115         u64 end;
2116         int ret;
2117
2118         mutex_lock(&root->fs_info->pinned_mutex);
2119         while(1) {
2120                 ret = find_first_extent_bit(unpin, 0, &start, &end,
2121                                             EXTENT_DIRTY);
2122                 if (ret)
2123                         break;
2124                 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
2125                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2126                 if (need_resched()) {
2127                         mutex_unlock(&root->fs_info->pinned_mutex);
2128                         cond_resched();
2129                         mutex_lock(&root->fs_info->pinned_mutex);
2130                 }
2131         }
2132         mutex_unlock(&root->fs_info->pinned_mutex);
2133         return 0;
2134 }
2135
2136 static int finish_current_insert(struct btrfs_trans_handle *trans,
2137                                  struct btrfs_root *extent_root, int all)
2138 {
2139         u64 start;
2140         u64 end;
2141         u64 priv;
2142         u64 search = 0;
2143         u64 skipped = 0;
2144         struct btrfs_fs_info *info = extent_root->fs_info;
2145         struct btrfs_path *path;
2146         struct pending_extent_op *extent_op, *tmp;
2147         struct list_head insert_list, update_list;
2148         int ret;
2149         int num_inserts = 0, max_inserts;
2150
2151         path = btrfs_alloc_path();
2152         INIT_LIST_HEAD(&insert_list);
2153         INIT_LIST_HEAD(&update_list);
2154
2155         max_inserts = extent_root->leafsize /
2156                 (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) +
2157                  sizeof(struct btrfs_extent_ref) +
2158                  sizeof(struct btrfs_extent_item));
2159 again:
2160         mutex_lock(&info->extent_ins_mutex);
2161         while (1) {
2162                 ret = find_first_extent_bit(&info->extent_ins, search, &start,
2163                                             &end, EXTENT_WRITEBACK);
2164                 if (ret) {
2165                         if (skipped && all && !num_inserts) {
2166                                 skipped = 0;
2167                                 search = 0;
2168                                 continue;
2169                         }
2170                         mutex_unlock(&info->extent_ins_mutex);
2171                         break;
2172                 }
2173
2174                 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
2175                 if (!ret) {
2176                         skipped = 1;
2177                         search = end + 1;
2178                         if (need_resched()) {
2179                                 mutex_unlock(&info->extent_ins_mutex);
2180                                 cond_resched();
2181                                 mutex_lock(&info->extent_ins_mutex);
2182                         }
2183                         continue;
2184                 }
2185
2186                 ret = get_state_private(&info->extent_ins, start, &priv);
2187                 BUG_ON(ret);
2188                 extent_op = (struct pending_extent_op *)(unsigned long) priv;
2189
2190                 if (extent_op->type == PENDING_EXTENT_INSERT) {
2191                         num_inserts++;
2192                         list_add_tail(&extent_op->list, &insert_list);
2193                         search = end + 1;
2194                         if (num_inserts == max_inserts) {
2195                                 mutex_unlock(&info->extent_ins_mutex);
2196                                 break;
2197                         }
2198                 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
2199                         list_add_tail(&extent_op->list, &update_list);
2200                         search = end + 1;
2201                 } else {
2202                         BUG();
2203                 }
2204         }
2205
2206         /*
2207          * process the update list, clear the writeback bit for it, and if
2208          * somebody marked this thing for deletion then just unlock it and be
2209          * done, the free_extents will handle it
2210          */
2211         mutex_lock(&info->extent_ins_mutex);
2212         list_for_each_entry_safe(extent_op, tmp, &update_list, list) {
2213                 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2214                                   extent_op->bytenr + extent_op->num_bytes - 1,
2215                                   EXTENT_WRITEBACK, GFP_NOFS);
2216                 if (extent_op->del) {
2217                         list_del_init(&extent_op->list);
2218                         unlock_extent(&info->extent_ins, extent_op->bytenr,
2219                                       extent_op->bytenr + extent_op->num_bytes
2220                                       - 1, GFP_NOFS);
2221                         kfree(extent_op);
2222                 }
2223         }
2224         mutex_unlock(&info->extent_ins_mutex);
2225
2226         /*
2227          * still have things left on the update list, go ahead an update
2228          * everything
2229          */
2230         if (!list_empty(&update_list)) {
2231                 ret = update_backrefs(trans, extent_root, path, &update_list);
2232                 BUG_ON(ret);
2233         }
2234
2235         /*
2236          * if no inserts need to be done, but we skipped some extents and we
2237          * need to make sure everything is cleaned then reset everything and
2238          * go back to the beginning
2239          */
2240         if (!num_inserts && all && skipped) {
2241                 search = 0;
2242                 skipped = 0;
2243                 INIT_LIST_HEAD(&update_list);
2244                 INIT_LIST_HEAD(&insert_list);
2245                 goto again;
2246         } else if (!num_inserts) {
2247                 goto out;
2248         }
2249
2250         /*
2251          * process the insert extents list.  Again if we are deleting this
2252          * extent, then just unlock it, pin down the bytes if need be, and be
2253          * done with it.  Saves us from having to actually insert the extent
2254          * into the tree and then subsequently come along and delete it
2255          */
2256         mutex_lock(&info->extent_ins_mutex);
2257         list_for_each_entry_safe(extent_op, tmp, &insert_list, list) {
2258                 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2259                                   extent_op->bytenr + extent_op->num_bytes - 1,
2260                                   EXTENT_WRITEBACK, GFP_NOFS);
2261                 if (extent_op->del) {
2262                         list_del_init(&extent_op->list);
2263                         unlock_extent(&info->extent_ins, extent_op->bytenr,
2264                                       extent_op->bytenr + extent_op->num_bytes
2265                                       - 1, GFP_NOFS);
2266
2267                         mutex_lock(&extent_root->fs_info->pinned_mutex);
2268                         ret = pin_down_bytes(trans, extent_root,
2269                                              extent_op->bytenr,
2270                                              extent_op->num_bytes, 0);
2271                         mutex_unlock(&extent_root->fs_info->pinned_mutex);
2272
2273                         ret = update_block_group(trans, extent_root,
2274                                                  extent_op->bytenr,
2275                                                  extent_op->num_bytes,
2276                                                  0, ret > 0);
2277                         BUG_ON(ret);
2278                         kfree(extent_op);
2279                         num_inserts--;
2280                 }
2281         }
2282         mutex_unlock(&info->extent_ins_mutex);
2283
2284         ret = insert_extents(trans, extent_root, path, &insert_list,
2285                              num_inserts);
2286         BUG_ON(ret);
2287
2288         /*
2289          * if we broke out of the loop in order to insert stuff because we hit
2290          * the maximum number of inserts at a time we can handle, then loop
2291          * back and pick up where we left off
2292          */
2293         if (num_inserts == max_inserts) {
2294                 INIT_LIST_HEAD(&insert_list);
2295                 INIT_LIST_HEAD(&update_list);
2296                 num_inserts = 0;
2297                 goto again;
2298         }
2299
2300         /*
2301          * again, if we need to make absolutely sure there are no more pending
2302          * extent operations left and we know that we skipped some, go back to
2303          * the beginning and do it all again
2304          */
2305         if (all && skipped) {
2306                 INIT_LIST_HEAD(&insert_list);
2307                 INIT_LIST_HEAD(&update_list);
2308                 search = 0;
2309                 skipped = 0;
2310                 num_inserts = 0;
2311                 goto again;
2312         }
2313 out:
2314         btrfs_free_path(path);
2315         return 0;
2316 }
2317
2318 static int pin_down_bytes(struct btrfs_trans_handle *trans,
2319                           struct btrfs_root *root,
2320                           u64 bytenr, u64 num_bytes, int is_data)
2321 {
2322         int err = 0;
2323         struct extent_buffer *buf;
2324
2325         if (is_data)
2326                 goto pinit;
2327
2328         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
2329         if (!buf)
2330                 goto pinit;
2331
2332         /* we can reuse a block if it hasn't been written
2333          * and it is from this transaction.  We can't
2334          * reuse anything from the tree log root because
2335          * it has tiny sub-transactions.
2336          */
2337         if (btrfs_buffer_uptodate(buf, 0) &&
2338             btrfs_try_tree_lock(buf)) {
2339                 u64 header_owner = btrfs_header_owner(buf);
2340                 u64 header_transid = btrfs_header_generation(buf);
2341                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2342                     header_owner != BTRFS_TREE_RELOC_OBJECTID &&
2343                     header_transid == trans->transid &&
2344                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2345                         clean_tree_block(NULL, root, buf);
2346                         btrfs_tree_unlock(buf);
2347                         free_extent_buffer(buf);
2348                         return 1;
2349                 }
2350                 btrfs_tree_unlock(buf);
2351         }
2352         free_extent_buffer(buf);
2353 pinit:
2354         btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2355
2356         BUG_ON(err < 0);
2357         return 0;
2358 }
2359
2360 /*
2361  * remove an extent from the root, returns 0 on success
2362  */
2363 static int __free_extent(struct btrfs_trans_handle *trans,
2364                          struct btrfs_root *root,
2365                          u64 bytenr, u64 num_bytes, u64 parent,
2366                          u64 root_objectid, u64 ref_generation,
2367                          u64 owner_objectid, int pin, int mark_free)
2368 {
2369         struct btrfs_path *path;
2370         struct btrfs_key key;
2371         struct btrfs_fs_info *info = root->fs_info;
2372         struct btrfs_root *extent_root = info->extent_root;
2373         struct extent_buffer *leaf;
2374         int ret;
2375         int extent_slot = 0;
2376         int found_extent = 0;
2377         int num_to_del = 1;
2378         struct btrfs_extent_item *ei;
2379         u32 refs;
2380
2381         key.objectid = bytenr;
2382         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
2383         key.offset = num_bytes;
2384         path = btrfs_alloc_path();
2385         if (!path)
2386                 return -ENOMEM;
2387
2388         path->reada = 1;
2389         ret = lookup_extent_backref(trans, extent_root, path,
2390                                     bytenr, parent, root_objectid,
2391                                     ref_generation, owner_objectid, 1);
2392         if (ret == 0) {
2393                 struct btrfs_key found_key;
2394                 extent_slot = path->slots[0];
2395                 while(extent_slot > 0) {
2396                         extent_slot--;
2397                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2398                                               extent_slot);
2399                         if (found_key.objectid != bytenr)
2400                                 break;
2401                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
2402                             found_key.offset == num_bytes) {
2403                                 found_extent = 1;
2404                                 break;
2405                         }
2406                         if (path->slots[0] - extent_slot > 5)
2407                                 break;
2408                 }
2409                 if (!found_extent) {
2410                         ret = remove_extent_backref(trans, extent_root, path);
2411                         BUG_ON(ret);
2412                         btrfs_release_path(extent_root, path);
2413                         ret = btrfs_search_slot(trans, extent_root,
2414                                                 &key, path, -1, 1);
2415                         if (ret) {
2416                                 printk(KERN_ERR "umm, got %d back from search"
2417                                        ", was looking for %Lu\n", ret,
2418                                        bytenr);
2419                                 btrfs_print_leaf(extent_root, path->nodes[0]);
2420                         }
2421                         BUG_ON(ret);
2422                         extent_slot = path->slots[0];
2423                 }
2424         } else {
2425                 btrfs_print_leaf(extent_root, path->nodes[0]);
2426                 WARN_ON(1);
2427                 printk("Unable to find ref byte nr %Lu root %Lu "
2428                        "gen %Lu owner %Lu\n", bytenr,
2429                        root_objectid, ref_generation, owner_objectid);
2430         }
2431
2432         leaf = path->nodes[0];
2433         ei = btrfs_item_ptr(leaf, extent_slot,
2434                             struct btrfs_extent_item);
2435         refs = btrfs_extent_refs(leaf, ei);
2436         BUG_ON(refs == 0);
2437         refs -= 1;
2438         btrfs_set_extent_refs(leaf, ei, refs);
2439
2440         btrfs_mark_buffer_dirty(leaf);
2441
2442         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
2443                 struct btrfs_extent_ref *ref;
2444                 ref = btrfs_item_ptr(leaf, path->slots[0],
2445                                      struct btrfs_extent_ref);
2446                 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
2447                 /* if the back ref and the extent are next to each other
2448                  * they get deleted below in one shot
2449                  */
2450                 path->slots[0] = extent_slot;
2451                 num_to_del = 2;
2452         } else if (found_extent) {
2453                 /* otherwise delete the extent back ref */
2454                 ret = remove_extent_backref(trans, extent_root, path);
2455                 BUG_ON(ret);
2456                 /* if refs are 0, we need to setup the path for deletion */
2457                 if (refs == 0) {
2458                         btrfs_release_path(extent_root, path);
2459                         ret = btrfs_search_slot(trans, extent_root, &key, path,
2460                                                 -1, 1);
2461                         BUG_ON(ret);
2462                 }
2463         }
2464
2465         if (refs == 0) {
2466                 u64 super_used;
2467                 u64 root_used;
2468 #ifdef BIO_RW_DISCARD
2469                 u64 map_length = num_bytes;
2470                 struct btrfs_multi_bio *multi = NULL;
2471 #endif
2472
2473                 if (pin) {
2474                         mutex_lock(&root->fs_info->pinned_mutex);
2475                         ret = pin_down_bytes(trans, root, bytenr, num_bytes,
2476                                 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
2477                         mutex_unlock(&root->fs_info->pinned_mutex);
2478                         if (ret > 0)
2479                                 mark_free = 1;
2480                         BUG_ON(ret < 0);
2481                 }
2482
2483                 /* block accounting for super block */
2484                 spin_lock_irq(&info->delalloc_lock);
2485                 super_used = btrfs_super_bytes_used(&info->super_copy);
2486                 btrfs_set_super_bytes_used(&info->super_copy,
2487                                            super_used - num_bytes);
2488                 spin_unlock_irq(&info->delalloc_lock);
2489
2490                 /* block accounting for root item */
2491                 root_used = btrfs_root_used(&root->root_item);
2492                 btrfs_set_root_used(&root->root_item,
2493                                            root_used - num_bytes);
2494                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
2495                                       num_to_del);
2496                 BUG_ON(ret);
2497                 btrfs_release_path(extent_root, path);
2498                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
2499                                          mark_free);
2500                 BUG_ON(ret);
2501
2502 #ifdef BIO_RW_DISCARD
2503                 /* Tell the block device(s) that the sectors can be discarded */
2504                 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
2505                                       bytenr, &map_length, &multi, 0);
2506                 if (!ret) {
2507                         struct btrfs_bio_stripe *stripe = multi->stripes;
2508                         int i;
2509
2510                         if (map_length > num_bytes)
2511                                 map_length = num_bytes;
2512
2513                         for (i = 0; i < multi->num_stripes; i++, stripe++) {
2514                                 btrfs_issue_discard(stripe->dev->bdev,
2515                                                     stripe->physical,
2516                                                      map_length);
2517                         }
2518                         kfree(multi);
2519                 }
2520 #endif
2521         }
2522         btrfs_free_path(path);
2523         finish_current_insert(trans, extent_root, 0);
2524         return ret;
2525 }
2526
2527 /*
2528  * find all the blocks marked as pending in the radix tree and remove
2529  * them from the extent map
2530  */
2531 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
2532                                btrfs_root *extent_root, int all)
2533 {
2534         int ret;
2535         int err = 0;
2536         u64 start;
2537         u64 end;
2538         u64 priv;
2539         u64 search = 0;
2540         int nr = 0, skipped = 0;
2541         struct extent_io_tree *pending_del;
2542         struct extent_io_tree *extent_ins;
2543         struct pending_extent_op *extent_op;
2544         struct btrfs_fs_info *info = extent_root->fs_info;
2545         struct list_head delete_list;
2546
2547         INIT_LIST_HEAD(&delete_list);
2548         extent_ins = &extent_root->fs_info->extent_ins;
2549         pending_del = &extent_root->fs_info->pending_del;
2550
2551 again:
2552         mutex_lock(&info->extent_ins_mutex);
2553         while(1) {
2554                 ret = find_first_extent_bit(pending_del, search, &start, &end,
2555                                             EXTENT_WRITEBACK);
2556                 if (ret) {
2557                         if (all && skipped && !nr) {
2558                                 search = 0;
2559                                 continue;
2560                         }
2561                         mutex_unlock(&info->extent_ins_mutex);
2562                         break;
2563                 }
2564
2565                 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS);
2566                 if (!ret) {
2567                         search = end+1;
2568                         skipped = 1;
2569
2570                         if (need_resched()) {
2571                                 mutex_unlock(&info->extent_ins_mutex);
2572                                 cond_resched();
2573                                 mutex_lock(&info->extent_ins_mutex);
2574                         }
2575
2576                         continue;
2577                 }
2578                 BUG_ON(ret < 0);
2579
2580                 ret = get_state_private(pending_del, start, &priv);
2581                 BUG_ON(ret);
2582                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2583
2584                 clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK,
2585                                   GFP_NOFS);
2586                 if (!test_range_bit(extent_ins, start, end,
2587                                     EXTENT_WRITEBACK, 0)) {
2588                         list_add_tail(&extent_op->list, &delete_list);
2589                         nr++;
2590                 } else {
2591                         kfree(extent_op);
2592
2593                         ret = get_state_private(&info->extent_ins, start,
2594                                                 &priv);
2595                         BUG_ON(ret);
2596                         extent_op = (struct pending_extent_op *)
2597                                                 (unsigned long)priv;
2598
2599                         clear_extent_bits(&info->extent_ins, start, end,
2600                                           EXTENT_WRITEBACK, GFP_NOFS);
2601
2602                         if (extent_op->type == PENDING_BACKREF_UPDATE) {
2603                                 list_add_tail(&extent_op->list, &delete_list);
2604                                 search = end + 1;
2605                                 nr++;
2606                                 continue;
2607                         }
2608
2609                         mutex_lock(&extent_root->fs_info->pinned_mutex);
2610                         ret = pin_down_bytes(trans, extent_root, start,
2611                                              end + 1 - start, 0);
2612                         mutex_unlock(&extent_root->fs_info->pinned_mutex);
2613
2614                         ret = update_block_group(trans, extent_root, start,
2615                                                 end + 1 - start, 0, ret > 0);
2616
2617                         unlock_extent(extent_ins, start, end, GFP_NOFS);
2618                         BUG_ON(ret);
2619                         kfree(extent_op);
2620                 }
2621                 if (ret)
2622                         err = ret;
2623
2624                 search = end + 1;
2625
2626                 if (need_resched()) {
2627                         mutex_unlock(&info->extent_ins_mutex);
2628                         cond_resched();
2629                         mutex_lock(&info->extent_ins_mutex);
2630                 }
2631         }
2632
2633         if (nr) {
2634                 ret = free_extents(trans, extent_root, &delete_list);
2635                 BUG_ON(ret);
2636         }
2637
2638         if (all && skipped) {
2639                 INIT_LIST_HEAD(&delete_list);
2640                 search = 0;
2641                 nr = 0;
2642                 goto again;
2643         }
2644
2645         return err;
2646 }
2647
2648 /*
2649  * remove an extent from the root, returns 0 on success
2650  */
2651 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2652                                struct btrfs_root *root,
2653                                u64 bytenr, u64 num_bytes, u64 parent,
2654                                u64 root_objectid, u64 ref_generation,
2655                                u64 owner_objectid, int pin)
2656 {
2657         struct btrfs_root *extent_root = root->fs_info->extent_root;
2658         int pending_ret;
2659         int ret;
2660
2661         WARN_ON(num_bytes < root->sectorsize);
2662         if (root == extent_root) {
2663                 struct pending_extent_op *extent_op = NULL;
2664
2665                 mutex_lock(&root->fs_info->extent_ins_mutex);
2666                 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
2667                                 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
2668                         u64 priv;
2669                         ret = get_state_private(&root->fs_info->extent_ins,
2670                                                 bytenr, &priv);
2671                         BUG_ON(ret);
2672                         extent_op = (struct pending_extent_op *)
2673                                                 (unsigned long)priv;
2674
2675                         extent_op->del = 1;
2676                         if (extent_op->type == PENDING_EXTENT_INSERT) {
2677                                 mutex_unlock(&root->fs_info->extent_ins_mutex);
2678                                 return 0;
2679                         }
2680                 }
2681
2682                 if (extent_op) {
2683                         ref_generation = extent_op->orig_generation;
2684                         parent = extent_op->orig_parent;
2685                 }
2686
2687                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2688                 BUG_ON(!extent_op);
2689
2690                 extent_op->type = PENDING_EXTENT_DELETE;
2691                 extent_op->bytenr = bytenr;
2692                 extent_op->num_bytes = num_bytes;
2693                 extent_op->parent = parent;
2694                 extent_op->orig_parent = parent;
2695                 extent_op->generation = ref_generation;
2696                 extent_op->orig_generation = ref_generation;
2697                 extent_op->level = (int)owner_objectid;
2698                 INIT_LIST_HEAD(&extent_op->list);
2699                 extent_op->del = 0;
2700
2701                 set_extent_bits(&root->fs_info->pending_del,
2702                                 bytenr, bytenr + num_bytes - 1,
2703                                 EXTENT_WRITEBACK, GFP_NOFS);
2704                 set_state_private(&root->fs_info->pending_del,
2705                                   bytenr, (unsigned long)extent_op);
2706                 mutex_unlock(&root->fs_info->extent_ins_mutex);
2707                 return 0;
2708         }
2709         /* if metadata always pin */
2710         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2711                 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2712                         struct btrfs_block_group_cache *cache;
2713
2714                         /* btrfs_free_reserved_extent */
2715                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
2716                         BUG_ON(!cache);
2717                         btrfs_add_free_space(cache, bytenr, num_bytes);
2718                         update_reserved_extents(root, bytenr, num_bytes, 0);
2719                         return 0;
2720                 }
2721                 pin = 1;
2722         }
2723
2724         /* if data pin when any transaction has committed this */
2725         if (ref_generation != trans->transid)
2726                 pin = 1;
2727
2728         ret = __free_extent(trans, root, bytenr, num_bytes, parent,
2729                             root_objectid, ref_generation,
2730                             owner_objectid, pin, pin == 0);
2731
2732         finish_current_insert(trans, root->fs_info->extent_root, 0);
2733         pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0);
2734         return ret ? ret : pending_ret;
2735 }
2736
2737 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2738                       struct btrfs_root *root,
2739                       u64 bytenr, u64 num_bytes, u64 parent,
2740                       u64 root_objectid, u64 ref_generation,
2741                       u64 owner_objectid, int pin)
2742 {
2743         int ret;
2744
2745         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent,
2746                                   root_objectid, ref_generation,
2747                                   owner_objectid, pin);
2748         return ret;
2749 }
2750
2751 static u64 stripe_align(struct btrfs_root *root, u64 val)
2752 {
2753         u64 mask = ((u64)root->stripesize - 1);
2754         u64 ret = (val + mask) & ~mask;
2755         return ret;
2756 }
2757
2758 /*
2759  * walks the btree of allocated extents and find a hole of a given size.
2760  * The key ins is changed to record the hole:
2761  * ins->objectid == block start
2762  * ins->flags = BTRFS_EXTENT_ITEM_KEY
2763  * ins->offset == number of blocks
2764  * Any available blocks before search_start are skipped.
2765  */
2766 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2767                                      struct btrfs_root *orig_root,
2768                                      u64 num_bytes, u64 empty_size,
2769                                      u64 search_start, u64 search_end,
2770                                      u64 hint_byte, struct btrfs_key *ins,
2771                                      u64 exclude_start, u64 exclude_nr,
2772                                      int data)
2773 {
2774         int ret = 0;
2775         struct btrfs_root * root = orig_root->fs_info->extent_root;
2776         u64 total_needed = num_bytes;
2777         u64 *last_ptr = NULL;
2778         u64 last_wanted = 0;
2779         struct btrfs_block_group_cache *block_group = NULL;
2780         int chunk_alloc_done = 0;
2781         int empty_cluster = 2 * 1024 * 1024;
2782         int allowed_chunk_alloc = 0;
2783         struct list_head *head = NULL, *cur = NULL;
2784         int loop = 0;
2785         int extra_loop = 0;
2786         struct btrfs_space_info *space_info;
2787
2788         WARN_ON(num_bytes < root->sectorsize);
2789         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
2790         ins->objectid = 0;
2791         ins->offset = 0;
2792
2793         if (orig_root->ref_cows || empty_size)
2794                 allowed_chunk_alloc = 1;
2795
2796         if (data & BTRFS_BLOCK_GROUP_METADATA) {
2797                 last_ptr = &root->fs_info->last_alloc;
2798                 empty_cluster = 64 * 1024;
2799         }
2800
2801         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
2802                 last_ptr = &root->fs_info->last_data_alloc;
2803
2804         if (last_ptr) {
2805                 if (*last_ptr) {
2806                         hint_byte = *last_ptr;
2807                         last_wanted = *last_ptr;
2808                 } else
2809                         empty_size += empty_cluster;
2810         } else {
2811                 empty_cluster = 0;
2812         }
2813         search_start = max(search_start, first_logical_byte(root, 0));
2814         search_start = max(search_start, hint_byte);
2815
2816         if (last_wanted && search_start != last_wanted) {
2817                 last_wanted = 0;
2818                 empty_size += empty_cluster;
2819         }
2820
2821         total_needed += empty_size;
2822         block_group = btrfs_lookup_block_group(root->fs_info, search_start);
2823         if (!block_group)
2824                 block_group = btrfs_lookup_first_block_group(root->fs_info,
2825                                                              search_start);
2826         space_info = __find_space_info(root->fs_info, data);
2827
2828         down_read(&space_info->groups_sem);
2829         while (1) {
2830                 struct btrfs_free_space *free_space;
2831                 /*
2832                  * the only way this happens if our hint points to a block
2833                  * group thats not of the proper type, while looping this
2834                  * should never happen
2835                  */
2836                 if (empty_size)
2837                         extra_loop = 1;
2838
2839                 if (!block_group)
2840                         goto new_group_no_lock;
2841
2842                 mutex_lock(&block_group->alloc_mutex);
2843                 if (unlikely(!block_group_bits(block_group, data)))
2844                         goto new_group;
2845
2846                 ret = cache_block_group(root, block_group);
2847                 if (ret) {
2848                         mutex_unlock(&block_group->alloc_mutex);
2849                         break;
2850                 }
2851
2852                 if (block_group->ro)
2853                         goto new_group;
2854
2855                 free_space = btrfs_find_free_space(block_group, search_start,
2856                                                    total_needed);
2857                 if (free_space) {
2858                         u64 start = block_group->key.objectid;
2859                         u64 end = block_group->key.objectid +
2860                                 block_group->key.offset;
2861
2862                         search_start = stripe_align(root, free_space->offset);
2863
2864                         /* move on to the next group */
2865                         if (search_start + num_bytes >= search_end)
2866                                 goto new_group;
2867
2868                         /* move on to the next group */
2869                         if (search_start + num_bytes > end)
2870                                 goto new_group;
2871
2872                         if (last_wanted && search_start != last_wanted) {
2873                                 total_needed += empty_cluster;
2874                                 empty_size += empty_cluster;
2875                                 last_wanted = 0;
2876                                 /*
2877                                  * if search_start is still in this block group
2878                                  * then we just re-search this block group
2879                                  */
2880                                 if (search_start >= start &&
2881                                     search_start < end) {
2882                                         mutex_unlock(&block_group->alloc_mutex);
2883                                         continue;
2884                                 }
2885
2886                                 /* else we go to the next block group */
2887                                 goto new_group;
2888                         }
2889
2890                         if (exclude_nr > 0 &&
2891                             (search_start + num_bytes > exclude_start &&
2892                              search_start < exclude_start + exclude_nr)) {
2893                                 search_start = exclude_start + exclude_nr;
2894                                 /*
2895                                  * if search_start is still in this block group
2896                                  * then we just re-search this block group
2897                                  */
2898                                 if (search_start >= start &&
2899                                     search_start < end) {
2900                                         mutex_unlock(&block_group->alloc_mutex);
2901                                         last_wanted = 0;
2902                                         continue;
2903                                 }
2904
2905                                 /* else we go to the next block group */
2906                                 goto new_group;
2907                         }
2908
2909                         ins->objectid = search_start;
2910                         ins->offset = num_bytes;
2911
2912                         btrfs_remove_free_space_lock(block_group, search_start,
2913                                                      num_bytes);
2914                         /* we are all good, lets return */
2915                         mutex_unlock(&block_group->alloc_mutex);
2916                         break;
2917                 }
2918 new_group:
2919                 mutex_unlock(&block_group->alloc_mutex);
2920 new_group_no_lock:
2921                 /* don't try to compare new allocations against the
2922                  * last allocation any more
2923                  */
2924                 last_wanted = 0;
2925
2926                 /*
2927                  * Here's how this works.
2928                  * loop == 0: we were searching a block group via a hint
2929                  *              and didn't find anything, so we start at
2930                  *              the head of the block groups and keep searching
2931                  * loop == 1: we're searching through all of the block groups
2932                  *              if we hit the head again we have searched
2933                  *              all of the block groups for this space and we
2934                  *              need to try and allocate, if we cant error out.
2935                  * loop == 2: we allocated more space and are looping through
2936                  *              all of the block groups again.
2937                  */
2938                 if (loop == 0) {
2939                         head = &space_info->block_groups;
2940                         cur = head->next;
2941                         loop++;
2942                 } else if (loop == 1 && cur == head) {
2943                         int keep_going;
2944
2945                         /* at this point we give up on the empty_size
2946                          * allocations and just try to allocate the min
2947                          * space.
2948                          *
2949                          * The extra_loop field was set if an empty_size
2950                          * allocation was attempted above, and if this
2951                          * is try we need to try the loop again without
2952                          * the additional empty_size.
2953                          */
2954                         total_needed -= empty_size;
2955                         empty_size = 0;
2956                         keep_going = extra_loop;
2957                         loop++;
2958
2959                         if (allowed_chunk_alloc && !chunk_alloc_done) {
2960                                 up_read(&space_info->groups_sem);
2961                                 ret = do_chunk_alloc(trans, root, num_bytes +
2962                                                      2 * 1024 * 1024, data, 1);
2963                                 down_read(&space_info->groups_sem);
2964                                 if (ret < 0)
2965                                         goto loop_check;
2966                                 head = &space_info->block_groups;
2967                                 /*
2968                                  * we've allocated a new chunk, keep
2969                                  * trying
2970                                  */
2971                                 keep_going = 1;
2972                                 chunk_alloc_done = 1;
2973                         } else if (!allowed_chunk_alloc) {
2974                                 space_info->force_alloc = 1;
2975                         }
2976 loop_check:
2977                         if (keep_going) {
2978                                 cur = head->next;
2979                                 extra_loop = 0;
2980                         } else {
2981                                 break;
2982                         }
2983                 } else if (cur == head) {
2984                         break;
2985                 }
2986
2987                 block_group = list_entry(cur, struct btrfs_block_group_cache,
2988                                          list);
2989                 search_start = block_group->key.objectid;
2990                 cur = cur->next;
2991         }
2992
2993         /* we found what we needed */
2994         if (ins->objectid) {
2995                 if (!(data & BTRFS_BLOCK_GROUP_DATA))
2996                         trans->block_group = block_group;
2997
2998                 if (last_ptr)
2999                         *last_ptr = ins->objectid + ins->offset;
3000                 ret = 0;
3001         } else if (!ret) {
3002                 printk(KERN_ERR "we were searching for %Lu bytes, num_bytes %Lu,"
3003                        " loop %d, allowed_alloc %d\n", total_needed, num_bytes,
3004                        loop, allowed_chunk_alloc);
3005                 ret = -ENOSPC;
3006         }
3007
3008         up_read(&space_info->groups_sem);
3009         return ret;
3010 }
3011
3012 static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
3013 {
3014         struct btrfs_block_group_cache *cache;
3015         struct list_head *l;
3016
3017         printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
3018                info->total_bytes - info->bytes_used - info->bytes_pinned -
3019                info->bytes_reserved, (info->full) ? "" : "not ");
3020
3021         down_read(&info->groups_sem);
3022         list_for_each(l, &info->block_groups) {
3023                 cache = list_entry(l, struct btrfs_block_group_cache, list);
3024                 spin_lock(&cache->lock);
3025                 printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
3026                        "%Lu pinned %Lu reserved\n",
3027                        cache->key.objectid, cache->key.offset,
3028                        btrfs_block_group_used(&cache->item),
3029                        cache->pinned, cache->reserved);
3030                 btrfs_dump_free_space(cache, bytes);
3031                 spin_unlock(&cache->lock);
3032         }
3033         up_read(&info->groups_sem);
3034 }
3035
3036 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3037                                   struct btrfs_root *root,
3038                                   u64 num_bytes, u64 min_alloc_size,
3039                                   u64 empty_size, u64 hint_byte,
3040                                   u64 search_end, struct btrfs_key *ins,
3041                                   u64 data)
3042 {
3043         int ret;
3044         u64 search_start = 0;
3045         u64 alloc_profile;
3046         struct btrfs_fs_info *info = root->fs_info;
3047
3048         if (data) {
3049                 alloc_profile = info->avail_data_alloc_bits &
3050                                 info->data_alloc_profile;
3051                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
3052         } else if (root == root->fs_info->chunk_root) {
3053                 alloc_profile = info->avail_system_alloc_bits &
3054                                 info->system_alloc_profile;
3055                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
3056         } else {
3057                 alloc_profile = info->avail_metadata_alloc_bits &
3058                                 info->metadata_alloc_profile;
3059                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
3060         }
3061 again:
3062         data = btrfs_reduce_alloc_profile(root, data);
3063         /*
3064          * the only place that sets empty_size is btrfs_realloc_node, which
3065          * is not called recursively on allocations
3066          */
3067         if (empty_size || root->ref_cows) {
3068                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
3069                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3070                                      2 * 1024 * 1024,
3071                                      BTRFS_BLOCK_GROUP_METADATA |
3072                                      (info->metadata_alloc_profile &
3073                                       info->avail_metadata_alloc_bits), 0);
3074                 }
3075                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3076                                      num_bytes + 2 * 1024 * 1024, data, 0);
3077         }
3078
3079         WARN_ON(num_bytes < root->sectorsize);
3080         ret = find_free_extent(trans, root, num_bytes, empty_size,
3081                                search_start, search_end, hint_byte, ins,
3082                                trans->alloc_exclude_start,
3083                                trans->alloc_exclude_nr, data);
3084
3085         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
3086                 num_bytes = num_bytes >> 1;
3087                 num_bytes = num_bytes & ~(root->sectorsize - 1);
3088                 num_bytes = max(num_bytes, min_alloc_size);
3089                 do_chunk_alloc(trans, root->fs_info->extent_root,
3090                                num_bytes, data, 1);
3091                 goto again;
3092         }
3093         if (ret) {
3094                 struct btrfs_space_info *sinfo;
3095
3096                 sinfo = __find_space_info(root->fs_info, data);
3097                 printk("allocation failed flags %Lu, wanted %Lu\n",
3098                        data, num_bytes);
3099                 dump_space_info(sinfo, num_bytes);
3100                 BUG();
3101         }
3102
3103         return ret;
3104 }
3105
3106 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
3107 {
3108         struct btrfs_block_group_cache *cache;
3109
3110         cache = btrfs_lookup_block_group(root->fs_info, start);
3111         if (!cache) {
3112                 printk(KERN_ERR "Unable to find block group for %Lu\n", start);
3113                 return -ENOSPC;
3114         }
3115         btrfs_add_free_space(cache, start, len);
3116         update_reserved_extents(root, start, len, 0);
3117         return 0;
3118 }
3119
3120 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3121                                   struct btrfs_root *root,
3122                                   u64 num_bytes, u64 min_alloc_size,
3123                                   u64 empty_size, u64 hint_byte,
3124                                   u64 search_end, struct btrfs_key *ins,
3125                                   u64 data)
3126 {
3127         int ret;
3128         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
3129                                      empty_size, hint_byte, search_end, ins,
3130                                      data);
3131         update_reserved_extents(root, ins->objectid, ins->offset, 1);
3132         return ret;
3133 }
3134
3135 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3136                                          struct btrfs_root *root, u64 parent,
3137                                          u64 root_objectid, u64 ref_generation,
3138                                          u64 owner, struct btrfs_key *ins)
3139 {
3140         int ret;
3141         int pending_ret;
3142         u64 super_used;
3143         u64 root_used;
3144         u64 num_bytes = ins->offset;
3145         u32 sizes[2];
3146         struct btrfs_fs_info *info = root->fs_info;
3147         struct btrfs_root *extent_root = info->extent_root;
3148         struct btrfs_extent_item *extent_item;
3149         struct btrfs_extent_ref *ref;
3150         struct btrfs_path *path;
3151         struct btrfs_key keys[2];
3152
3153         if (parent == 0)
3154                 parent = ins->objectid;
3155
3156         /* block accounting for super block */
3157         spin_lock_irq(&info->delalloc_lock);
3158         super_used = btrfs_super_bytes_used(&info->super_copy);
3159         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
3160         spin_unlock_irq(&info->delalloc_lock);
3161
3162         /* block accounting for root item */
3163         root_used = btrfs_root_used(&root->root_item);
3164         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
3165
3166         if (root == extent_root) {
3167                 struct pending_extent_op *extent_op;
3168
3169                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
3170                 BUG_ON(!extent_op);
3171
3172                 extent_op->type = PENDING_EXTENT_INSERT;
3173                 extent_op->bytenr = ins->objectid;
3174                 extent_op->num_bytes = ins->offset;
3175                 extent_op->parent = parent;
3176                 extent_op->orig_parent = 0;
3177                 extent_op->generation = ref_generation;
3178                 extent_op->orig_generation = 0;
3179                 extent_op->level = (int)owner;
3180                 INIT_LIST_HEAD(&extent_op->list);
3181                 extent_op->del = 0;
3182
3183                 mutex_lock(&root->fs_info->extent_ins_mutex);
3184                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
3185                                 ins->objectid + ins->offset - 1,
3186                                 EXTENT_WRITEBACK, GFP_NOFS);
3187                 set_state_private(&root->fs_info->extent_ins,
3188                                   ins->objectid, (unsigned long)extent_op);
3189                 mutex_unlock(&root->fs_info->extent_ins_mutex);
3190                 goto update_block;
3191         }
3192
3193         memcpy(&keys[0], ins, sizeof(*ins));
3194         keys[1].objectid = ins->objectid;
3195         keys[1].type = BTRFS_EXTENT_REF_KEY;
3196         keys[1].offset = parent;
3197         sizes[0] = sizeof(*extent_item);
3198         sizes[1] = sizeof(*ref);
3199
3200         path = btrfs_alloc_path();
3201         BUG_ON(!path);
3202
3203         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
3204                                        sizes, 2);
3205         BUG_ON(ret);
3206
3207         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3208                                      struct btrfs_extent_item);
3209         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
3210         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
3211                              struct btrfs_extent_ref);
3212
3213         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
3214         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
3215         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
3216         btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
3217
3218         btrfs_mark_buffer_dirty(path->nodes[0]);
3219
3220         trans->alloc_exclude_start = 0;
3221         trans->alloc_exclude_nr = 0;
3222         btrfs_free_path(path);
3223         finish_current_insert(trans, extent_root, 0);
3224         pending_ret = del_pending_extents(trans, extent_root, 0);
3225
3226         if (ret)
3227                 goto out;
3228         if (pending_ret) {
3229                 ret = pending_ret;
3230                 goto out;
3231         }
3232
3233 update_block:
3234         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
3235         if (ret) {
3236                 printk("update block group failed for %Lu %Lu\n",
3237                        ins->objectid, ins->offset);
3238                 BUG();
3239         }
3240 out:
3241         return ret;
3242 }
3243
3244 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3245                                 struct btrfs_root *root, u64 parent,
3246                                 u64 root_objectid, u64 ref_generation,
3247                                 u64 owner, struct btrfs_key *ins)
3248 {
3249         int ret;
3250
3251         if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
3252                 return 0;
3253         ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
3254                                             ref_generation, owner, ins);
3255         update_reserved_extents(root, ins->objectid, ins->offset, 0);
3256         return ret;
3257 }
3258
3259 /*
3260  * this is used by the tree logging recovery code.  It records that
3261  * an extent has been allocated and makes sure to clear the free
3262  * space cache bits as well
3263  */
3264 int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
3265                                 struct btrfs_root *root, u64 parent,
3266                                 u64 root_objectid, u64 ref_generation,
3267                                 u64 owner, struct btrfs_key *ins)
3268 {
3269         int ret;
3270         struct btrfs_block_group_cache *block_group;
3271
3272         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
3273         mutex_lock(&block_group->alloc_mutex);
3274         cache_block_group(root, block_group);
3275
3276         ret = btrfs_remove_free_space_lock(block_group, ins->objectid,
3277                                            ins->offset);
3278         mutex_unlock(&block_group->alloc_mutex);
3279         BUG_ON(ret);
3280         ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
3281                                             ref_generation, owner, ins);
3282         return ret;
3283 }
3284
3285 /*
3286  * finds a free extent and does all the dirty work required for allocation
3287  * returns the key for the extent through ins, and a tree buffer for
3288  * the first block of the extent through buf.
3289  *
3290  * returns 0 if everything worked, non-zero otherwise.
3291  */
3292 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
3293                        struct btrfs_root *root,
3294                        u64 num_bytes, u64 parent, u64 min_alloc_size,
3295                        u64 root_objectid, u64 ref_generation,
3296                        u64 owner_objectid, u64 empty_size, u64 hint_byte,
3297                        u64 search_end, struct btrfs_key *ins, u64 data)
3298 {
3299         int ret;
3300
3301         ret = __btrfs_reserve_extent(trans, root, num_bytes,
3302                                      min_alloc_size, empty_size, hint_byte,
3303                                      search_end, ins, data);
3304         BUG_ON(ret);
3305         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
3306                 ret = __btrfs_alloc_reserved_extent(trans, root, parent,
3307                                         root_objectid, ref_generation,
3308                                         owner_objectid, ins);
3309                 BUG_ON(ret);
3310
3311         } else {
3312                 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3313         }
3314         return ret;
3315 }
3316
3317 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
3318                                             struct btrfs_root *root,
3319                                             u64 bytenr, u32 blocksize)
3320 {
3321         struct extent_buffer *buf;
3322
3323         buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
3324         if (!buf)
3325                 return ERR_PTR(-ENOMEM);
3326         btrfs_set_header_generation(buf, trans->transid);
3327         btrfs_tree_lock(buf);
3328         clean_tree_block(trans, root, buf);
3329         btrfs_set_buffer_uptodate(buf);
3330         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
3331                 set_extent_dirty(&root->dirty_log_pages, buf->start,
3332                          buf->start + buf->len - 1, GFP_NOFS);
3333         } else {
3334                 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
3335                          buf->start + buf->len - 1, GFP_NOFS);
3336         }
3337         trans->blocks_used++;
3338         return buf;
3339 }
3340
3341 /*
3342  * helper function to allocate a block for a given tree
3343  * returns the tree buffer or NULL.
3344  */
3345 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3346                                              struct btrfs_root *root,
3347                                              u32 blocksize, u64 parent,
3348                                              u64 root_objectid,
3349                                              u64 ref_generation,
3350                                              int level,
3351                                              u64 hint,
3352                                              u64 empty_size)
3353 {
3354         struct btrfs_key ins;
3355         int ret;
3356         struct extent_buffer *buf;
3357
3358         ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
3359                                  root_objectid, ref_generation, level,
3360                                  empty_size, hint, (u64)-1, &ins, 0);
3361         if (ret) {
3362                 BUG_ON(ret > 0);
3363                 return ERR_PTR(ret);
3364         }
3365
3366         buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
3367         return buf;
3368 }
3369
3370 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3371                         struct btrfs_root *root, struct extent_buffer *leaf)
3372 {
3373         u64 leaf_owner;
3374         u64 leaf_generation;
3375         struct btrfs_key key;
3376         struct btrfs_file_extent_item *fi;
3377         int i;
3378         int nritems;
3379         int ret;
3380
3381         BUG_ON(!btrfs_is_leaf(leaf));
3382         nritems = btrfs_header_nritems(leaf);
3383         leaf_owner = btrfs_header_owner(leaf);
3384         leaf_generation = btrfs_header_generation(leaf);
3385
3386         for (i = 0; i < nritems; i++) {
3387                 u64 disk_bytenr;
3388                 cond_resched();
3389
3390                 btrfs_item_key_to_cpu(leaf, &key, i);
3391                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3392                         continue;
3393                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
3394                 if (btrfs_file_extent_type(leaf, fi) ==
3395                     BTRFS_FILE_EXTENT_INLINE)
3396                         continue;
3397                 /*
3398                  * FIXME make sure to insert a trans record that
3399                  * repeats the snapshot del on crash
3400                  */
3401                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
3402                 if (disk_bytenr == 0)
3403                         continue;
3404
3405                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
3406                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
3407                                 leaf->start, leaf_owner, leaf_generation,
3408                                 key.objectid, 0);
3409                 BUG_ON(ret);
3410
3411                 atomic_inc(&root->fs_info->throttle_gen);
3412                 wake_up(&root->fs_info->transaction_throttle);
3413                 cond_resched();
3414         }
3415         return 0;
3416 }
3417
3418 static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3419                                         struct btrfs_root *root,
3420                                         struct btrfs_leaf_ref *ref)
3421 {
3422         int i;
3423         int ret;
3424         struct btrfs_extent_info *info = ref->extents;
3425
3426         for (i = 0; i < ref->nritems; i++) {
3427                 ret = __btrfs_free_extent(trans, root, info->bytenr,
3428                                           info->num_bytes, ref->bytenr,
3429                                           ref->owner, ref->generation,
3430                                           info->objectid, 0);
3431
3432                 atomic_inc(&root->fs_info->throttle_gen);
3433                 wake_up(&root->fs_info->transaction_throttle);
3434                 cond_resched();
3435
3436                 BUG_ON(ret);
3437                 info++;
3438         }
3439
3440         return 0;
3441 }
3442
3443 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
3444                               u32 *refs)
3445 {
3446         int ret;
3447
3448         ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs);
3449         BUG_ON(ret);
3450
3451 #if 0 // some debugging code in case we see problems here
3452         /* if the refs count is one, it won't get increased again.  But
3453          * if the ref count is > 1, someone may be decreasing it at
3454          * the same time we are.
3455          */
3456         if (*refs != 1) {
3457                 struct extent_buffer *eb = NULL;
3458                 eb = btrfs_find_create_tree_block(root, start, len);
3459                 if (eb)
3460                         btrfs_tree_lock(eb);
3461
3462                 mutex_lock(&root->fs_info->alloc_mutex);
3463                 ret = lookup_extent_ref(NULL, root, start, len, refs);
3464                 BUG_ON(ret);
3465                 mutex_unlock(&root->fs_info->alloc_mutex);
3466
3467                 if (eb) {
3468                         btrfs_tree_unlock(eb);
3469                         free_extent_buffer(eb);
3470                 }
3471                 if (*refs == 1) {
3472                         printk("block %llu went down to one during drop_snap\n",
3473                                (unsigned long long)start);
3474                 }
3475
3476         }
3477 #endif
3478
3479         cond_resched();
3480         return ret;
3481 }
3482
3483 /*
3484  * helper function for drop_snapshot, this walks down the tree dropping ref
3485  * counts as it goes.
3486  */
3487 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
3488                                    struct btrfs_root *root,
3489                                    struct btrfs_path *path, int *level)
3490 {
3491         u64 root_owner;
3492         u64 root_gen;
3493         u64 bytenr;
3494         u64 ptr_gen;
3495         struct extent_buffer *next;
3496         struct extent_buffer *cur;
3497         struct extent_buffer *parent;
3498         struct btrfs_leaf_ref *ref;
3499         u32 blocksize;
3500         int ret;
3501         u32 refs;
3502
3503         WARN_ON(*level < 0);
3504         WARN_ON(*level >= BTRFS_MAX_LEVEL);
3505         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
3506                                 path->nodes[*level]->len, &refs);
3507         BUG_ON(ret);
3508         if (refs > 1)
3509                 goto out;
3510
3511         /*
3512          * walk down to the last node level and free all the leaves
3513          */
3514         while(*level >= 0) {
3515                 WARN_ON(*level < 0);
3516                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
3517                 cur = path->nodes[*level];
3518
3519                 if (btrfs_header_level(cur) != *level)
3520                         WARN_ON(1);
3521
3522                 if (path->slots[*level] >=
3523                     btrfs_header_nritems(cur))
3524                         break;
3525                 if (*level == 0) {
3526                         ret = btrfs_drop_leaf_ref(trans, root, cur);
3527                         BUG_ON(ret);
3528                         break;
3529                 }
3530                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
3531                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
3532                 blocksize = btrfs_level_size(root, *level - 1);
3533
3534                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
3535                 BUG_ON(ret);
3536                 if (refs != 1) {
3537                         parent = path->nodes[*level];
3538                         root_owner = btrfs_header_owner(parent);
3539                         root_gen = btrfs_header_generation(parent);
3540                         path->slots[*level]++;
3541
3542                         ret = __btrfs_free_extent(trans, root, bytenr,
3543                                                 blocksize, parent->start,
3544                                                 root_owner, root_gen,
3545                                                 *level - 1, 1);
3546                         BUG_ON(ret);
3547
3548                         atomic_inc(&root->fs_info->throttle_gen);
3549                         wake_up(&root->fs_info->transaction_throttle);
3550                         cond_resched();
3551
3552                         continue;
3553                 }
3554                 /*
3555                  * at this point, we have a single ref, and since the
3556                  * only place referencing this extent is a dead root
3557                  * the reference count should never go higher.
3558                  * So, we don't need to check it again
3559                  */
3560                 if (*level == 1) {
3561                         ref = btrfs_lookup_leaf_ref(root, bytenr);
3562                         if (ref && ref->generation != ptr_gen) {
3563                                 btrfs_free_leaf_ref(root, ref);
3564                                 ref = NULL;
3565                         }
3566                         if (ref) {
3567                                 ret = cache_drop_leaf_ref(trans, root, ref);
3568                                 BUG_ON(ret);
3569                                 btrfs_remove_leaf_ref(root, ref);
3570                                 btrfs_free_leaf_ref(root, ref);
3571                                 *level = 0;
3572                                 break;
3573                         }
3574                         if (printk_ratelimit()) {
3575                                 printk("leaf ref miss for bytenr %llu\n",
3576                                        (unsigned long long)bytenr);
3577                         }
3578                 }
3579                 next = btrfs_find_tree_block(root, bytenr, blocksize);
3580                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
3581                         free_extent_buffer(next);
3582
3583                         next = read_tree_block(root, bytenr, blocksize,
3584                                                ptr_gen);
3585                         cond_resched();
3586 #if 0
3587                         /*
3588                          * this is a debugging check and can go away
3589                          * the ref should never go all the way down to 1
3590                          * at this point
3591                          */
3592                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
3593                                                 &refs);
3594                         BUG_ON(ret);
3595                         WARN_ON(refs != 1);
3596 #endif
3597                 }
3598                 WARN_ON(*level <= 0);
3599                 if (path->nodes[*level-1])
3600                         free_extent_buffer(path->nodes[*level-1]);
3601                 path->nodes[*level-1] = next;
3602                 *level = btrfs_header_level(next);
3603                 path->slots[*level] = 0;
3604                 cond_resched();
3605         }
3606 out:
3607         WARN_ON(*level < 0);
3608         WARN_ON(*level >= BTRFS_MAX_LEVEL);
3609
3610         if (path->nodes[*level] == root->node) {
3611                 parent = path->nodes[*level];
3612                 bytenr = path->nodes[*level]->start;
3613         } else {
3614                 parent = path->nodes[*level + 1];
3615                 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
3616         }
3617
3618         blocksize = btrfs_level_size(root, *level);
3619         root_owner = btrfs_header_owner(parent);
3620         root_gen = btrfs_header_generation(parent);
3621
3622         ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
3623                                   parent->start, root_owner, root_gen,
3624                                   *level, 1);
3625         free_extent_buffer(path->nodes[*level]);
3626         path->nodes[*level] = NULL;
3627         *level += 1;
3628         BUG_ON(ret);
3629
3630         cond_resched();
3631         return 0;
3632 }
3633
3634 /*
3635  * helper function for drop_subtree, this function is similar to
3636  * walk_down_tree. The main difference is that it checks reference
3637  * counts while tree blocks are locked.
3638  */
3639 static int noinline walk_down_subtree(struct btrfs_trans_handle *trans,
3640                                       struct btrfs_root *root,
3641                                       struct btrfs_path *path, int *level)
3642 {
3643         struct extent_buffer *next;
3644         struct extent_buffer *cur;
3645         struct extent_buffer *parent;
3646         u64 bytenr;
3647         u64 ptr_gen;
3648         u32 blocksize;
3649         u32 refs;
3650         int ret;
3651
3652         cur = path->nodes[*level];
3653         ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len,
3654                                       &refs);
3655         BUG_ON(ret);
3656         if (refs > 1)
3657                 goto out;
3658
3659         while (*level >= 0) {
3660                 cur = path->nodes[*level];
3661                 if (*level == 0) {
3662                         ret = btrfs_drop_leaf_ref(trans, root, cur);
3663                         BUG_ON(ret);
3664                         clean_tree_block(trans, root, cur);
3665                         break;
3666                 }
3667                 if (path->slots[*level] >= btrfs_header_nritems(cur)) {
3668                         clean_tree_block(trans, root, cur);
3669                         break;
3670                 }
3671
3672                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
3673                 blocksize = btrfs_level_size(root, *level - 1);
3674                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
3675
3676                 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
3677                 btrfs_tree_lock(next);
3678
3679                 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
3680                                               &refs);
3681                 BUG_ON(ret);
3682                 if (refs > 1) {
3683                         parent = path->nodes[*level];
3684                         ret = btrfs_free_extent(trans, root, bytenr,
3685                                         blocksize, parent->start,
3686                                         btrfs_header_owner(parent),
3687                                         btrfs_header_generation(parent),
3688                                         *level - 1, 1);
3689                         BUG_ON(ret);
3690                         path->slots[*level]++;
3691                         btrfs_tree_unlock(next);
3692                         free_extent_buffer(next);
3693                         continue;
3694                 }
3695
3696                 *level = btrfs_header_level(next);
3697                 path->nodes[*level] = next;
3698                 path->slots[*level] = 0;
3699                 path->locks[*level] = 1;
3700                 cond_resched();
3701         }
3702 out:
3703         parent = path->nodes[*level + 1];
3704         bytenr = path->nodes[*level]->start;
3705         blocksize = path->nodes[*level]->len;
3706
3707         ret = btrfs_free_extent(trans, root, bytenr, blocksize,
3708                         parent->start, btrfs_header_owner(parent),
3709                         btrfs_header_generation(parent), *level, 1);
3710         BUG_ON(ret);
3711
3712         if (path->locks[*level]) {
3713                 btrfs_tree_unlock(path->nodes[*level]);
3714                 path->locks[*level] = 0;
3715         }
3716         free_extent_buffer(path->nodes[*level]);
3717         path->nodes[*level] = NULL;
3718         *level += 1;
3719         cond_resched();
3720         return 0;
3721 }
3722
3723 /*
3724  * helper for dropping snapshots.  This walks back up the tree in the path
3725  * to find the first node higher up where we haven't yet gone through
3726  * all the slots
3727  */
3728 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3729                                  struct btrfs_root *root,
3730                                  struct btrfs_path *path,
3731                                  int *level, int max_level)
3732 {
3733         u64 root_owner;
3734         u64 root_gen;
3735         struct btrfs_root_item *root_item = &root->root_item;
3736         int i;
3737         int slot;
3738         int ret;
3739
3740         for (i = *level; i < max_level && path->nodes[i]; i++) {
3741                 slot = path->slots[i];
3742                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3743                         struct extent_buffer *node;
3744                         struct btrfs_disk_key disk_key;
3745                         node = path->nodes[i];
3746                         path->slots[i]++;
3747                         *level = i;
3748                         WARN_ON(*level == 0);
3749                         btrfs_node_key(node, &disk_key, path->slots[i]);
3750                         memcpy(&root_item->drop_progress,
3751                                &disk_key, sizeof(disk_key));
3752                         root_item->drop_level = i;
3753                         return 0;
3754                 } else {
3755                         struct extent_buffer *parent;
3756                         if (path->nodes[*level] == root->node)
3757                                 parent = path->nodes[*level];
3758                         else
3759                                 parent = path->nodes[*level + 1];
3760
3761                         root_owner = btrfs_header_owner(parent);
3762                         root_gen = btrfs_header_generation(parent);
3763
3764                         clean_tree_block(trans, root, path->nodes[*level]);
3765                         ret = btrfs_free_extent(trans, root,
3766                                                 path->nodes[*level]->start,
3767                                                 path->nodes[*level]->len,
3768                                                 parent->start, root_owner,
3769                                                 root_gen, *level, 1);
3770                         BUG_ON(ret);
3771                         if (path->locks[*level]) {
3772                                 btrfs_tree_unlock(path->nodes[*level]);
3773                                 path->locks[*level] = 0;
3774                         }
3775                         free_extent_buffer(path->nodes[*level]);
3776                         path->nodes[*level] = NULL;
3777                         *level = i + 1;
3778                 }
3779         }
3780         return 1;
3781 }
3782
3783 /*
3784  * drop the reference count on the tree rooted at 'snap'.  This traverses
3785  * the tree freeing any blocks that have a ref count of zero after being
3786  * decremented.
3787  */
3788 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3789                         *root)
3790 {
3791         int ret = 0;
3792         int wret;
3793         int level;
3794         struct btrfs_path *path;
3795         int i;
3796         int orig_level;
3797         struct btrfs_root_item *root_item = &root->root_item;
3798
3799         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
3800         path = btrfs_alloc_path();
3801         BUG_ON(!path);
3802
3803         level = btrfs_header_level(root->node);
3804         orig_level = level;
3805         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3806                 path->nodes[level] = root->node;
3807                 extent_buffer_get(root->node);
3808                 path->slots[level] = 0;
3809         } else {
3810                 struct btrfs_key key;
3811                 struct btrfs_disk_key found_key;
3812                 struct extent_buffer *node;
3813
3814                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3815                 level = root_item->drop_level;
3816                 path->lowest_level = level;
3817                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3818                 if (wret < 0) {
3819                         ret = wret;
3820                         goto out;
3821                 }
3822                 node = path->nodes[level];
3823                 btrfs_node_key(node, &found_key, path->slots[level]);
3824                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3825                                sizeof(found_key)));
3826                 /*
3827                  * unlock our path, this is safe because only this
3828                  * function is allowed to delete this snapshot
3829                  */
3830                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
3831                         if (path->nodes[i] && path->locks[i]) {
3832                                 path->locks[i] = 0;
3833                                 btrfs_tree_unlock(path->nodes[i]);
3834                         }
3835                 }
3836         }
3837         while(1) {
3838                 wret = walk_down_tree(trans, root, path, &level);
3839                 if (wret > 0)
3840                         break;
3841                 if (wret < 0)
3842                         ret = wret;
3843
3844                 wret = walk_up_tree(trans, root, path, &level,
3845                                     BTRFS_MAX_LEVEL);
3846                 if (wret > 0)
3847                         break;
3848                 if (wret < 0)
3849                         ret = wret;
3850                 if (trans->transaction->in_commit) {
3851                         ret = -EAGAIN;
3852                         break;
3853                 }
3854                 atomic_inc(&root->fs_info->throttle_gen);
3855                 wake_up(&root->fs_info->transaction_throttle);
3856         }
3857         for (i = 0; i <= orig_level; i++) {
3858                 if (path->nodes[i]) {
3859                         free_extent_buffer(path->nodes[i]);
3860                         path->nodes[i] = NULL;
3861                 }
3862         }
3863 out:
3864         btrfs_free_path(path);
3865         return ret;
3866 }
3867
3868 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
3869                         struct btrfs_root *root,
3870                         struct extent_buffer *node,
3871                         struct extent_buffer *parent)
3872 {
3873         struct btrfs_path *path;
3874         int level;
3875         int parent_level;
3876         int ret = 0;
3877         int wret;
3878
3879         path = btrfs_alloc_path();
3880         BUG_ON(!path);
3881
3882         BUG_ON(!btrfs_tree_locked(parent));
3883         parent_level = btrfs_header_level(parent);
3884         extent_buffer_get(parent);
3885         path->nodes[parent_level] = parent;
3886         path->slots[parent_level] = btrfs_header_nritems(parent);
3887
3888         BUG_ON(!btrfs_tree_locked(node));
3889         level = btrfs_header_level(node);
3890         extent_buffer_get(node);
3891         path->nodes[level] = node;
3892         path->slots[level] = 0;
3893
3894         while (1) {
3895                 wret = walk_down_subtree(trans, root, path, &level);
3896                 if (wret < 0)
3897                         ret = wret;
3898                 if (wret != 0)
3899                         break;
3900
3901                 wret = walk_up_tree(trans, root, path, &level, parent_level);
3902                 if (wret < 0)
3903                         ret = wret;
3904                 if (wret != 0)
3905                         break;
3906         }
3907
3908         btrfs_free_path(path);
3909         return ret;
3910 }
3911
3912 static unsigned long calc_ra(unsigned long start, unsigned long last,
3913                              unsigned long nr)
3914 {
3915         return min(last, start + nr - 1);
3916 }
3917
3918 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
3919                                          u64 len)
3920 {
3921         u64 page_start;
3922         u64 page_end;
3923         unsigned long first_index;
3924         unsigned long last_index;
3925         unsigned long i;
3926         struct page *page;
3927         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3928         struct file_ra_state *ra;
3929         struct btrfs_ordered_extent *ordered;
3930         unsigned int total_read = 0;
3931         unsigned int total_dirty = 0;
3932         int ret = 0;
3933
3934         ra = kzalloc(sizeof(*ra), GFP_NOFS);
3935
3936         mutex_lock(&inode->i_mutex);
3937         first_index = start >> PAGE_CACHE_SHIFT;
3938         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
3939
3940         /* make sure the dirty trick played by the caller work */
3941         ret = invalidate_inode_pages2_range(inode->i_mapping,
3942                                             first_index, last_index);
3943         if (ret)
3944                 goto out_unlock;
3945
3946         file_ra_state_init(ra, inode->i_mapping);
3947
3948         for (i = first_index ; i <= last_index; i++) {
3949                 if (total_read % ra->ra_pages == 0) {
3950                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
3951                                        calc_ra(i, last_index, ra->ra_pages));
3952                 }
3953                 total_read++;
3954 again:
3955                 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
3956                         BUG_ON(1);
3957                 page = grab_cache_page(inode->i_mapping, i);
3958                 if (!page) {
3959                         ret = -ENOMEM;
3960                         goto out_unlock;
3961                 }
3962                 if (!PageUptodate(page)) {
3963                         btrfs_readpage(NULL, page);
3964                         lock_page(page);
3965                         if (!PageUptodate(page)) {
3966                                 unlock_page(page);
3967                                 page_cache_release(page);
3968                                 ret = -EIO;
3969                                 goto out_unlock;
3970                         }
3971                 }
3972                 wait_on_page_writeback(page);
3973
3974                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
3975                 page_end = page_start + PAGE_CACHE_SIZE - 1;
3976                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
3977
3978                 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3979                 if (ordered) {
3980                         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3981                         unlock_page(page);
3982                         page_cache_release(page);
3983                         btrfs_start_ordered_extent(inode, ordered, 1);
3984                         btrfs_put_ordered_extent(ordered);
3985                         goto again;
3986                 }
3987                 set_page_extent_mapped(page);
3988
3989                 btrfs_set_extent_delalloc(inode, page_start, page_end);
3990                 if (i == first_index)
3991                         set_extent_bits(io_tree, page_start, page_end,
3992                                         EXTENT_BOUNDARY, GFP_NOFS);
3993
3994                 set_page_dirty(page);
3995                 total_dirty++;
3996
3997                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3998                 unlock_page(page);
3999                 page_cache_release(page);
4000         }
4001
4002 out_unlock:
4003         kfree(ra);
4004         mutex_unlock(&inode->i_mutex);
4005         balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
4006         return ret;
4007 }
4008
4009 static int noinline relocate_data_extent(struct inode *reloc_inode,
4010                                          struct btrfs_key *extent_key,
4011                                          u64 offset)
4012 {
4013         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
4014         struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
4015         struct extent_map *em;
4016         u64 start = extent_key->objectid - offset;
4017         u64 end = start + extent_key->offset - 1;
4018
4019         em = alloc_extent_map(GFP_NOFS);
4020         BUG_ON(!em || IS_ERR(em));
4021
4022         em->start = start;
4023         em->len = extent_key->offset;
4024         em->block_len = extent_key->offset;
4025         em->block_start = extent_key->objectid;
4026         em->bdev = root->fs_info->fs_devices->latest_bdev;
4027         set_bit(EXTENT_FLAG_PINNED, &em->flags);
4028
4029         /* setup extent map to cheat btrfs_readpage */
4030         lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
4031         while (1) {
4032                 int ret;
4033                 spin_lock(&em_tree->lock);
4034                 ret = add_extent_mapping(em_tree, em);
4035                 spin_unlock(&em_tree->lock);
4036                 if (ret != -EEXIST) {
4037                         free_extent_map(em);
4038                         break;
4039                 }
4040                 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
4041         }
4042         unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
4043
4044         return relocate_inode_pages(reloc_inode, start, extent_key->offset);
4045 }
4046
4047 struct btrfs_ref_path {
4048         u64 extent_start;
4049         u64 nodes[BTRFS_MAX_LEVEL];
4050         u64 root_objectid;
4051         u64 root_generation;
4052         u64 owner_objectid;
4053         u32 num_refs;
4054         int lowest_level;
4055         int current_level;
4056         int shared_level;
4057
4058         struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
4059         u64 new_nodes[BTRFS_MAX_LEVEL];
4060 };
4061
4062 struct disk_extent {
4063         u64 ram_bytes;
4064         u64 disk_bytenr;
4065         u64 disk_num_bytes;
4066         u64 offset;
4067         u64 num_bytes;
4068         u8 compression;
4069         u8 encryption;
4070         u16 other_encoding;
4071 };
4072
4073 static int is_cowonly_root(u64 root_objectid)
4074 {
4075         if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
4076             root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
4077             root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
4078             root_objectid == BTRFS_DEV_TREE_OBJECTID ||
4079             root_objectid == BTRFS_TREE_LOG_OBJECTID)
4080                 return 1;
4081         return 0;
4082 }
4083
4084 static int noinline __next_ref_path(struct btrfs_trans_handle *trans,
4085                                     struct btrfs_root *extent_root,
4086                                     struct btrfs_ref_path *ref_path,
4087                                     int first_time)
4088 {
4089         struct extent_buffer *leaf;
4090         struct btrfs_path *path;
4091         struct btrfs_extent_ref *ref;
4092         struct btrfs_key key;
4093         struct btrfs_key found_key;
4094         u64 bytenr;
4095         u32 nritems;
4096         int level;
4097         int ret = 1;
4098
4099         path = btrfs_alloc_path();
4100         if (!path)
4101                 return -ENOMEM;
4102
4103         if (first_time) {
4104                 ref_path->lowest_level = -1;
4105                 ref_path->current_level = -1;
4106                 ref_path->shared_level = -1;
4107                 goto walk_up;
4108         }
4109 walk_down:
4110         level = ref_path->current_level - 1;
4111         while (level >= -1) {
4112                 u64 parent;
4113                 if (level < ref_path->lowest_level)
4114                         break;
4115
4116                 if (level >= 0) {
4117                         bytenr = ref_path->nodes[level];
4118                 } else {
4119                         bytenr = ref_path->extent_start;
4120                 }
4121                 BUG_ON(bytenr == 0);
4122
4123                 parent = ref_path->nodes[level + 1];
4124                 ref_path->nodes[level + 1] = 0;
4125                 ref_path->current_level = level;
4126                 BUG_ON(parent == 0);
4127
4128                 key.objectid = bytenr;
4129                 key.offset = parent + 1;
4130                 key.type = BTRFS_EXTENT_REF_KEY;
4131
4132                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
4133                 if (ret < 0)
4134                         goto out;
4135                 BUG_ON(ret == 0);
4136
4137                 leaf = path->nodes[0];
4138                 nritems = btrfs_header_nritems(leaf);
4139                 if (path->slots[0] >= nritems) {
4140                         ret = btrfs_next_leaf(extent_root, path);
4141                         if (ret < 0)
4142                                 goto out;
4143                         if (ret > 0)
4144                                 goto next;
4145                         leaf = path->nodes[0];
4146                 }
4147
4148                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4149                 if (found_key.objectid == bytenr &&
4150                     found_key.type == BTRFS_EXTENT_REF_KEY) {
4151                         if (level < ref_path->shared_level)
4152                                 ref_path->shared_level = level;
4153                         goto found;
4154                 }
4155 next:
4156                 level--;
4157                 btrfs_release_path(extent_root, path);
4158                 cond_resched();
4159         }
4160         /* reached lowest level */
4161         ret = 1;
4162         goto out;
4163 walk_up:
4164         level = ref_path->current_level;
4165         while (level < BTRFS_MAX_LEVEL - 1) {
4166                 u64 ref_objectid;
4167                 if (level >= 0) {
4168                         bytenr = ref_path->nodes[level];
4169                 } else {
4170                         bytenr = ref_path->extent_start;
4171                 }
4172                 BUG_ON(bytenr == 0);
4173
4174                 key.objectid = bytenr;
4175                 key.offset = 0;
4176                 key.type = BTRFS_EXTENT_REF_KEY;
4177
4178                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
4179                 if (ret < 0)
4180                         goto out;
4181
4182                 leaf = path->nodes[0];
4183                 nritems = btrfs_header_nritems(leaf);
4184                 if (path->slots[0] >= nritems) {
4185                         ret = btrfs_next_leaf(extent_root, path);
4186                         if (ret < 0)
4187                                 goto out;
4188                         if (ret > 0) {
4189                                 /* the extent was freed by someone */
4190                                 if (ref_path->lowest_level == level)
4191                                         goto out;
4192                                 btrfs_release_path(extent_root, path);
4193                                 goto walk_down;
4194                         }
4195                         leaf = path->nodes[0];
4196                 }
4197
4198                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4199                 if (found_key.objectid != bytenr ||
4200                                 found_key.type != BTRFS_EXTENT_REF_KEY) {
4201                         /* the extent was freed by someone */
4202                         if (ref_path->lowest_level == level) {
4203                                 ret = 1;
4204                                 goto out;
4205                         }
4206                         btrfs_release_path(extent_root, path);
4207                         goto walk_down;
4208                 }
4209 found:
4210                 ref = btrfs_item_ptr(leaf, path->slots[0],
4211                                 struct btrfs_extent_ref);
4212                 ref_objectid = btrfs_ref_objectid(leaf, ref);
4213                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
4214                         if (first_time) {
4215                                 level = (int)ref_objectid;
4216                                 BUG_ON(level >= BTRFS_MAX_LEVEL);
4217                                 ref_path->lowest_level = level;
4218                                 ref_path->current_level = level;
4219                                 ref_path->nodes[level] = bytenr;
4220                         } else {
4221                                 WARN_ON(ref_objectid != level);
4222                         }
4223                 } else {
4224                         WARN_ON(level != -1);
4225                 }
4226                 first_time = 0;
4227
4228                 if (ref_path->lowest_level == level) {
4229                         ref_path->owner_objectid = ref_objectid;
4230                         ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
4231                 }
4232
4233                 /*
4234                  * the block is tree root or the block isn't in reference
4235                  * counted tree.
4236                  */
4237                 if (found_key.objectid == found_key.offset ||
4238                     is_cowonly_root(btrfs_ref_root(leaf, ref))) {
4239                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
4240                         ref_path->root_generation =
4241                                 btrfs_ref_generation(leaf, ref);
4242                         if (level < 0) {
4243                                 /* special reference from the tree log */
4244                                 ref_path->nodes[0] = found_key.offset;
4245                                 ref_path->current_level = 0;
4246                         }
4247                         ret = 0;
4248                         goto out;
4249                 }
4250
4251                 level++;
4252                 BUG_ON(ref_path->nodes[level] != 0);
4253                 ref_path->nodes[level] = found_key.offset;
4254                 ref_path->current_level = level;
4255
4256                 /*
4257                  * the reference was created in the running transaction,
4258                  * no need to continue walking up.
4259                  */
4260                 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
4261                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
4262                         ref_path->root_generation =
4263                                 btrfs_ref_generation(leaf, ref);
4264                         ret = 0;
4265                         goto out;
4266                 }
4267
4268                 btrfs_release_path(extent_root, path);
4269                 cond_resched();
4270         }
4271         /* reached max tree level, but no tree root found. */
4272         BUG();
4273 out:
4274         btrfs_free_path(path);
4275         return ret;
4276 }
4277
4278 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
4279                                 struct btrfs_root *extent_root,
4280                                 struct btrfs_ref_path *ref_path,
4281                                 u64 extent_start)
4282 {
4283         memset(ref_path, 0, sizeof(*ref_path));
4284         ref_path->extent_start = extent_start;
4285
4286         return __next_ref_path(trans, extent_root, ref_path, 1);
4287 }
4288
4289 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
4290                                struct btrfs_root *extent_root,
4291                                struct btrfs_ref_path *ref_path)
4292 {
4293         return __next_ref_path(trans, extent_root, ref_path, 0);
4294 }
4295
4296 static int noinline get_new_locations(struct inode *reloc_inode,
4297                                       struct btrfs_key *extent_key,
4298                                       u64 offset, int no_fragment,
4299                                       struct disk_extent **extents,
4300                                       int *nr_extents)
4301 {
4302         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
4303         struct btrfs_path *path;
4304         struct btrfs_file_extent_item *fi;
4305         struct extent_buffer *leaf;
4306         struct disk_extent *exts = *extents;
4307         struct btrfs_key found_key;
4308         u64 cur_pos;
4309         u64 last_byte;
4310         u32 nritems;
4311         int nr = 0;
4312         int max = *nr_extents;
4313         int ret;
4314
4315         WARN_ON(!no_fragment && *extents);
4316         if (!exts) {
4317                 max = 1;
4318                 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
4319                 if (!exts)
4320                         return -ENOMEM;
4321         }
4322
4323         path = btrfs_alloc_path();
4324         BUG_ON(!path);
4325
4326         cur_pos = extent_key->objectid - offset;
4327         last_byte = extent_key->objectid + extent_key->offset;
4328         ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
4329                                        cur_pos, 0);
4330         if (ret < 0)
4331                 goto out;
4332         if (ret > 0) {
4333                 ret = -ENOENT;
4334                 goto out;
4335         }
4336
4337         while (1) {
4338                 leaf = path->nodes[0];
4339                 nritems = btrfs_header_nritems(leaf);
4340                 if (path->slots[0] >= nritems) {
4341                         ret = btrfs_next_leaf(root, path);
4342                         if (ret < 0)
4343                                 goto out;
4344                         if (ret > 0)
4345                                 break;
4346                         leaf = path->nodes[0];
4347                 }
4348
4349                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4350                 if (found_key.offset != cur_pos ||
4351                     found_key.type != BTRFS_EXTENT_DATA_KEY ||
4352                     found_key.objectid != reloc_inode->i_ino)
4353                         break;
4354
4355                 fi = btrfs_item_ptr(leaf, path->slots[0],
4356                                     struct btrfs_file_extent_item);
4357                 if (btrfs_file_extent_type(leaf, fi) !=
4358                     BTRFS_FILE_EXTENT_REG ||
4359                     btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
4360                         break;
4361
4362                 if (nr == max) {
4363                         struct disk_extent *old = exts;
4364                         max *= 2;
4365                         exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
4366                         memcpy(exts, old, sizeof(*exts) * nr);
4367                         if (old != *extents)
4368                                 kfree(old);
4369                 }
4370
4371                 exts[nr].disk_bytenr =
4372                         btrfs_file_extent_disk_bytenr(leaf, fi);
4373                 exts[nr].disk_num_bytes =
4374                         btrfs_file_extent_disk_num_bytes(leaf, fi);
4375                 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
4376                 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4377                 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
4378                 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
4379                 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
4380                 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
4381                                                                            fi);
4382                 BUG_ON(exts[nr].offset > 0);
4383                 BUG_ON(exts[nr].compression || exts[nr].encryption);
4384                 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
4385
4386                 cur_pos += exts[nr].num_bytes;
4387                 nr++;
4388
4389                 if (cur_pos + offset >= last_byte)
4390                         break;
4391
4392                 if (no_fragment) {
4393                         ret = 1;
4394                         goto out;
4395                 }
4396                 path->slots[0]++;
4397         }
4398
4399         WARN_ON(cur_pos + offset > last_byte);
4400         if (cur_pos + offset < last_byte) {
4401                 ret = -ENOENT;
4402                 goto out;
4403         }
4404         ret = 0;
4405 out:
4406         btrfs_free_path(path);
4407         if (ret) {
4408                 if (exts != *extents)
4409                         kfree(exts);
4410         } else {
4411                 *extents = exts;
4412                 *nr_extents = nr;
4413         }
4414         return ret;
4415 }
4416
4417 static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
4418                                         struct btrfs_root *root,
4419                                         struct btrfs_path *path,
4420                                         struct btrfs_key *extent_key,
4421                                         struct btrfs_key *leaf_key,
4422                                         struct btrfs_ref_path *ref_path,
4423                                         struct disk_extent *new_extents,
4424                                         int nr_extents)
4425 {
4426         struct extent_buffer *leaf;
4427         struct btrfs_file_extent_item *fi;
4428         struct inode *inode = NULL;
4429         struct btrfs_key key;
4430         u64 lock_start = 0;
4431         u64 lock_end = 0;
4432         u64 num_bytes;
4433         u64 ext_offset;
4434         u64 first_pos;
4435         u32 nritems;
4436         int nr_scaned = 0;
4437         int extent_locked = 0;
4438         int extent_type;
4439         int ret;
4440
4441         memcpy(&key, leaf_key, sizeof(key));
4442         first_pos = INT_LIMIT(loff_t) - extent_key->offset;
4443         if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
4444                 if (key.objectid < ref_path->owner_objectid ||
4445                     (key.objectid == ref_path->owner_objectid &&
4446                      key.type < BTRFS_EXTENT_DATA_KEY)) {
4447                         key.objectid = ref_path->owner_objectid;
4448                         key.type = BTRFS_EXTENT_DATA_KEY;
4449                         key.offset = 0;
4450                 }
4451         }
4452
4453         while (1) {
4454                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4455                 if (ret < 0)
4456                         goto out;
4457
4458                 leaf = path->nodes[0];
4459                 nritems = btrfs_header_nritems(leaf);
4460 next:
4461                 if (extent_locked && ret > 0) {
4462                         /*
4463                          * the file extent item was modified by someone
4464                          * before the extent got locked.
4465                          */
4466                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4467                                       lock_end, GFP_NOFS);
4468                         extent_locked = 0;
4469                 }
4470
4471                 if (path->slots[0] >= nritems) {
4472                         if (++nr_scaned > 2)
4473                                 break;
4474
4475                         BUG_ON(extent_locked);
4476                         ret = btrfs_next_leaf(root, path);
4477                         if (ret < 0)
4478                                 goto out;
4479                         if (ret > 0)
4480                                 break;
4481                         leaf = path->nodes[0];
4482                         nritems = btrfs_header_nritems(leaf);
4483                 }
4484
4485                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4486
4487                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
4488                         if ((key.objectid > ref_path->owner_objectid) ||
4489                             (key.objectid == ref_path->owner_objectid &&
4490                              key.type > BTRFS_EXTENT_DATA_KEY) ||
4491                             (key.offset >= first_pos + extent_key->offset))
4492                                 break;
4493                 }
4494
4495                 if (inode && key.objectid != inode->i_ino) {
4496                         BUG_ON(extent_locked);
4497                         btrfs_release_path(root, path);
4498                         mutex_unlock(&inode->i_mutex);
4499                         iput(inode);
4500                         inode = NULL;
4501                         continue;
4502                 }
4503
4504                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
4505                         path->slots[0]++;
4506                         ret = 1;
4507                         goto next;
4508                 }
4509                 fi = btrfs_item_ptr(leaf, path->slots[0],
4510                                     struct btrfs_file_extent_item);
4511                 extent_type = btrfs_file_extent_type(leaf, fi);
4512                 if ((extent_type != BTRFS_FILE_EXTENT_REG &&
4513                      extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
4514                     (btrfs_file_extent_disk_bytenr(leaf, fi) !=
4515                      extent_key->objectid)) {
4516                         path->slots[0]++;
4517                         ret = 1;
4518                         goto next;
4519                 }
4520
4521                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4522                 ext_offset = btrfs_file_extent_offset(leaf, fi);
4523
4524                 if (first_pos > key.offset - ext_offset)
4525                         first_pos = key.offset - ext_offset;
4526
4527                 if (!extent_locked) {
4528                         lock_start = key.offset;
4529                         lock_end = lock_start + num_bytes - 1;
4530                 } else {
4531                         if (lock_start > key.offset ||
4532                             lock_end + 1 < key.offset + num_bytes) {
4533                                 unlock_extent(&BTRFS_I(inode)->io_tree,
4534                                               lock_start, lock_end, GFP_NOFS);
4535                                 extent_locked = 0;
4536                         }
4537                 }
4538
4539                 if (!inode) {
4540                         btrfs_release_path(root, path);
4541
4542                         inode = btrfs_iget_locked(root->fs_info->sb,
4543                                                   key.objectid, root);
4544                         if (inode->i_state & I_NEW) {
4545                                 BTRFS_I(inode)->root = root;
4546                                 BTRFS_I(inode)->location.objectid =
4547                                         key.objectid;
4548                                 BTRFS_I(inode)->location.type =
4549                                         BTRFS_INODE_ITEM_KEY;
4550                                 BTRFS_I(inode)->location.offset = 0;
4551                                 btrfs_read_locked_inode(inode);
4552                                 unlock_new_inode(inode);
4553                         }
4554                         /*
4555                          * some code call btrfs_commit_transaction while
4556                          * holding the i_mutex, so we can't use mutex_lock
4557                          * here.
4558                          */
4559                         if (is_bad_inode(inode) ||
4560                             !mutex_trylock(&inode->i_mutex)) {
4561                                 iput(inode);
4562                                 inode = NULL;
4563                                 key.offset = (u64)-1;
4564                                 goto skip;
4565                         }
4566                 }
4567
4568                 if (!extent_locked) {
4569                         struct btrfs_ordered_extent *ordered;
4570
4571                         btrfs_release_path(root, path);
4572
4573                         lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4574                                     lock_end, GFP_NOFS);
4575                         ordered = btrfs_lookup_first_ordered_extent(inode,
4576                                                                     lock_end);
4577                         if (ordered &&
4578                             ordered->file_offset <= lock_end &&
4579                             ordered->file_offset + ordered->len > lock_start) {
4580                                 unlock_extent(&BTRFS_I(inode)->io_tree,
4581                                               lock_start, lock_end, GFP_NOFS);
4582                                 btrfs_start_ordered_extent(inode, ordered, 1);
4583                                 btrfs_put_ordered_extent(ordered);
4584                                 key.offset += num_bytes;
4585                                 goto skip;
4586                         }
4587                         if (ordered)
4588                                 btrfs_put_ordered_extent(ordered);
4589
4590                         extent_locked = 1;
4591                         continue;
4592                 }
4593
4594                 if (nr_extents == 1) {
4595                         /* update extent pointer in place */
4596                         btrfs_set_file_extent_disk_bytenr(leaf, fi,
4597                                                 new_extents[0].disk_bytenr);
4598                         btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4599                                                 new_extents[0].disk_num_bytes);
4600                         btrfs_mark_buffer_dirty(leaf);
4601
4602                         btrfs_drop_extent_cache(inode, key.offset,
4603                                                 key.offset + num_bytes - 1, 0);
4604
4605                         ret = btrfs_inc_extent_ref(trans, root,
4606                                                 new_extents[0].disk_bytenr,
4607                                                 new_extents[0].disk_num_bytes,
4608                                                 leaf->start,
4609                                                 root->root_key.objectid,
4610                                                 trans->transid,
4611                                                 key.objectid);
4612                         BUG_ON(ret);
4613
4614                         ret = btrfs_free_extent(trans, root,
4615                                                 extent_key->objectid,
4616                                                 extent_key->offset,
4617                                                 leaf->start,
4618                                                 btrfs_header_owner(leaf),
4619                                                 btrfs_header_generation(leaf),
4620                                                 key.objectid, 0);
4621                         BUG_ON(ret);
4622
4623                         btrfs_release_path(root, path);
4624                         key.offset += num_bytes;
4625                 } else {
4626                         BUG_ON(1);
4627 #if 0
4628                         u64 alloc_hint;
4629                         u64 extent_len;
4630                         int i;
4631                         /*
4632                          * drop old extent pointer at first, then insert the
4633                          * new pointers one bye one
4634                          */
4635                         btrfs_release_path(root, path);
4636                         ret = btrfs_drop_extents(trans, root, inode, key.offset,
4637                                                  key.offset + num_bytes,
4638                                                  key.offset, &alloc_hint);
4639                         BUG_ON(ret);
4640
4641                         for (i = 0; i < nr_extents; i++) {
4642                                 if (ext_offset >= new_extents[i].num_bytes) {
4643                                         ext_offset -= new_extents[i].num_bytes;
4644                                         continue;
4645                                 }
4646                                 extent_len = min(new_extents[i].num_bytes -
4647                                                  ext_offset, num_bytes);
4648
4649                                 ret = btrfs_insert_empty_item(trans, root,
4650                                                               path, &key,
4651                                                               sizeof(*fi));
4652                                 BUG_ON(ret);
4653
4654                                 leaf = path->nodes[0];
4655                                 fi = btrfs_item_ptr(leaf, path->slots[0],
4656                                                 struct btrfs_file_extent_item);
4657                                 btrfs_set_file_extent_generation(leaf, fi,
4658                                                         trans->transid);
4659                                 btrfs_set_file_extent_type(leaf, fi,
4660                                                         BTRFS_FILE_EXTENT_REG);
4661                                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4662                                                 new_extents[i].disk_bytenr);
4663                                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4664                                                 new_extents[i].disk_num_bytes);
4665                                 btrfs_set_file_extent_ram_bytes(leaf, fi,
4666                                                 new_extents[i].ram_bytes);
4667
4668                                 btrfs_set_file_extent_compression(leaf, fi,
4669                                                 new_extents[i].compression);
4670                                 btrfs_set_file_extent_encryption(leaf, fi,
4671                                                 new_extents[i].encryption);
4672                                 btrfs_set_file_extent_other_encoding(leaf, fi,
4673                                                 new_extents[i].other_encoding);
4674
4675                                 btrfs_set_file_extent_num_bytes(leaf, fi,
4676                                                         extent_len);
4677                                 ext_offset += new_extents[i].offset;
4678                                 btrfs_set_file_extent_offset(leaf, fi,
4679                                                         ext_offset);
4680                                 btrfs_mark_buffer_dirty(leaf);
4681
4682                                 btrfs_drop_extent_cache(inode, key.offset,
4683                                                 key.offset + extent_len - 1, 0);
4684
4685                                 ret = btrfs_inc_extent_ref(trans, root,
4686                                                 new_extents[i].disk_bytenr,
4687                                                 new_extents[i].disk_num_bytes,
4688                                                 leaf->start,
4689                                                 root->root_key.objectid,
4690                                                 trans->transid, key.objectid);
4691                                 BUG_ON(ret);
4692                                 btrfs_release_path(root, path);
4693
4694                                 inode_add_bytes(inode, extent_len);
4695
4696                                 ext_offset = 0;
4697                                 num_bytes -= extent_len;
4698                                 key.offset += extent_len;
4699
4700                                 if (num_bytes == 0)
4701                                         break;
4702                         }
4703                         BUG_ON(i >= nr_extents);
4704 #endif
4705                 }
4706
4707                 if (extent_locked) {
4708                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4709                                       lock_end, GFP_NOFS);
4710                         extent_locked = 0;
4711                 }
4712 skip:
4713                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
4714                     key.offset >= first_pos + extent_key->offset)
4715                         break;
4716
4717                 cond_resched();
4718         }
4719         ret = 0;
4720 out:
4721         btrfs_release_path(root, path);
4722         if (inode) {
4723                 mutex_unlock(&inode->i_mutex);
4724                 if (extent_locked) {
4725                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4726                                       lock_end, GFP_NOFS);
4727                 }
4728                 iput(inode);
4729         }
4730         return ret;
4731 }
4732
4733 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
4734                                struct btrfs_root *root,
4735                                struct extent_buffer *buf, u64 orig_start)
4736 {
4737         int level;
4738         int ret;
4739
4740         BUG_ON(btrfs_header_generation(buf) != trans->transid);
4741         BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
4742
4743         level = btrfs_header_level(buf);
4744         if (level == 0) {
4745                 struct btrfs_leaf_ref *ref;
4746                 struct btrfs_leaf_ref *orig_ref;
4747
4748                 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
4749                 if (!orig_ref)
4750                         return -ENOENT;
4751
4752                 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
4753                 if (!ref) {
4754                         btrfs_free_leaf_ref(root, orig_ref);
4755                         return -ENOMEM;
4756                 }
4757
4758                 ref->nritems = orig_ref->nritems;
4759                 memcpy(ref->extents, orig_ref->extents,
4760                         sizeof(ref->extents[0]) * ref->nritems);
4761
4762                 btrfs_free_leaf_ref(root, orig_ref);
4763
4764                 ref->root_gen = trans->transid;
4765                 ref->bytenr = buf->start;
4766                 ref->owner = btrfs_header_owner(buf);
4767                 ref->generation = btrfs_header_generation(buf);
4768                 ret = btrfs_add_leaf_ref(root, ref, 0);
4769                 WARN_ON(ret);
4770                 btrfs_free_leaf_ref(root, ref);
4771         }
4772         return 0;
4773 }
4774
4775 static int noinline invalidate_extent_cache(struct btrfs_root *root,
4776                                         struct extent_buffer *leaf,
4777                                         struct btrfs_block_group_cache *group,
4778                                         struct btrfs_root *target_root)
4779 {
4780         struct btrfs_key key;
4781         struct inode *inode = NULL;
4782         struct btrfs_file_extent_item *fi;
4783         u64 num_bytes;
4784         u64 skip_objectid = 0;
4785         u32 nritems;
4786         u32 i;
4787
4788         nritems = btrfs_header_nritems(leaf);
4789         for (i = 0; i < nritems; i++) {
4790                 btrfs_item_key_to_cpu(leaf, &key, i);
4791                 if (key.objectid == skip_objectid ||
4792                     key.type != BTRFS_EXTENT_DATA_KEY)
4793                         continue;
4794                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4795                 if (btrfs_file_extent_type(leaf, fi) ==
4796                     BTRFS_FILE_EXTENT_INLINE)
4797                         continue;
4798                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
4799                         continue;
4800                 if (!inode || inode->i_ino != key.objectid) {
4801                         iput(inode);
4802                         inode = btrfs_ilookup(target_root->fs_info->sb,
4803                                               key.objectid, target_root, 1);
4804                 }
4805                 if (!inode) {
4806                         skip_objectid = key.objectid;
4807                         continue;
4808                 }
4809                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4810
4811                 lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4812                             key.offset + num_bytes - 1, GFP_NOFS);
4813                 btrfs_drop_extent_cache(inode, key.offset,
4814                                         key.offset + num_bytes - 1, 1);
4815                 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4816                               key.offset + num_bytes - 1, GFP_NOFS);
4817                 cond_resched();
4818         }
4819         iput(inode);
4820         return 0;
4821 }
4822
4823 static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans,
4824                                         struct btrfs_root *root,
4825                                         struct extent_buffer *leaf,
4826                                         struct btrfs_block_group_cache *group,
4827                                         struct inode *reloc_inode)
4828 {
4829         struct btrfs_key key;
4830         struct btrfs_key extent_key;
4831         struct btrfs_file_extent_item *fi;
4832         struct btrfs_leaf_ref *ref;
4833         struct disk_extent *new_extent;
4834         u64 bytenr;
4835         u64 num_bytes;
4836         u32 nritems;
4837         u32 i;
4838         int ext_index;
4839         int nr_extent;
4840         int ret;
4841
4842         new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
4843         BUG_ON(!new_extent);
4844
4845         ref = btrfs_lookup_leaf_ref(root, leaf->start);
4846         BUG_ON(!ref);
4847
4848         ext_index = -1;
4849         nritems = btrfs_header_nritems(leaf);
4850         for (i = 0; i < nritems; i++) {
4851                 btrfs_item_key_to_cpu(leaf, &key, i);
4852                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
4853                         continue;
4854                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4855                 if (btrfs_file_extent_type(leaf, fi) ==
4856                     BTRFS_FILE_EXTENT_INLINE)
4857                         continue;
4858                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4859                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4860                 if (bytenr == 0)
4861                         continue;
4862
4863                 ext_index++;
4864                 if (bytenr >= group->key.objectid + group->key.offset ||
4865                     bytenr + num_bytes <= group->key.objectid)
4866                         continue;
4867
4868                 extent_key.objectid = bytenr;
4869                 extent_key.offset = num_bytes;
4870                 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4871                 nr_extent = 1;
4872                 ret = get_new_locations(reloc_inode, &extent_key,
4873                                         group->key.objectid, 1,
4874                                         &new_extent, &nr_extent);
4875                 if (ret > 0)
4876                         continue;
4877                 BUG_ON(ret < 0);
4878
4879                 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
4880                 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
4881                 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
4882                 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
4883
4884                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4885                                                 new_extent->disk_bytenr);
4886                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4887                                                 new_extent->disk_num_bytes);
4888                 btrfs_mark_buffer_dirty(leaf);
4889
4890                 ret = btrfs_inc_extent_ref(trans, root,
4891                                         new_extent->disk_bytenr,
4892                                         new_extent->disk_num_bytes,
4893                                         leaf->start,
4894                                         root->root_key.objectid,
4895                                         trans->transid, key.objectid);
4896                 BUG_ON(ret);
4897                 ret = btrfs_free_extent(trans, root,
4898                                         bytenr, num_bytes, leaf->start,
4899                                         btrfs_header_owner(leaf),
4900                                         btrfs_header_generation(leaf),
4901                                         key.objectid, 0);
4902                 BUG_ON(ret);
4903                 cond_resched();
4904         }
4905         kfree(new_extent);
4906         BUG_ON(ext_index + 1 != ref->nritems);
4907         btrfs_free_leaf_ref(root, ref);
4908         return 0;
4909 }
4910
4911 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
4912                           struct btrfs_root *root)
4913 {
4914         struct btrfs_root *reloc_root;
4915         int ret;
4916
4917         if (root->reloc_root) {
4918                 reloc_root = root->reloc_root;
4919                 root->reloc_root = NULL;
4920                 list_add(&reloc_root->dead_list,
4921                          &root->fs_info->dead_reloc_roots);
4922
4923                 btrfs_set_root_bytenr(&reloc_root->root_item,
4924                                       reloc_root->node->start);
4925                 btrfs_set_root_level(&root->root_item,
4926                                      btrfs_header_level(reloc_root->node));
4927                 memset(&reloc_root->root_item.drop_progress, 0,
4928                         sizeof(struct btrfs_disk_key));
4929                 reloc_root->root_item.drop_level = 0;
4930
4931                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
4932                                         &reloc_root->root_key,
4933                                         &reloc_root->root_item);
4934                 BUG_ON(ret);
4935         }
4936         return 0;
4937 }
4938
4939 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
4940 {
4941         struct btrfs_trans_handle *trans;
4942         struct btrfs_root *reloc_root;
4943         struct btrfs_root *prev_root = NULL;
4944         struct list_head dead_roots;
4945         int ret;
4946         unsigned long nr;
4947
4948         INIT_LIST_HEAD(&dead_roots);
4949         list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
4950
4951         while (!list_empty(&dead_roots)) {
4952                 reloc_root = list_entry(dead_roots.prev,
4953                                         struct btrfs_root, dead_list);
4954                 list_del_init(&reloc_root->dead_list);
4955
4956                 BUG_ON(reloc_root->commit_root != NULL);
4957                 while (1) {
4958                         trans = btrfs_join_transaction(root, 1);
4959                         BUG_ON(!trans);
4960
4961                         mutex_lock(&root->fs_info->drop_mutex);
4962                         ret = btrfs_drop_snapshot(trans, reloc_root);
4963                         if (ret != -EAGAIN)
4964                                 break;
4965                         mutex_unlock(&root->fs_info->drop_mutex);
4966
4967                         nr = trans->blocks_used;
4968                         ret = btrfs_end_transaction(trans, root);
4969                         BUG_ON(ret);
4970                         btrfs_btree_balance_dirty(root, nr);
4971                 }
4972
4973                 free_extent_buffer(reloc_root->node);
4974
4975                 ret = btrfs_del_root(trans, root->fs_info->tree_root,
4976                                      &reloc_root->root_key);
4977                 BUG_ON(ret);
4978                 mutex_unlock(&root->fs_info->drop_mutex);
4979
4980                 nr = trans->blocks_used;
4981                 ret = btrfs_end_transaction(trans, root);
4982                 BUG_ON(ret);
4983                 btrfs_btree_balance_dirty(root, nr);
4984
4985                 kfree(prev_root);
4986                 prev_root = reloc_root;
4987         }
4988         if (prev_root) {
4989                 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
4990                 kfree(prev_root);
4991         }
4992         return 0;
4993 }
4994
4995 int btrfs_add_dead_reloc_root(struct btrfs_root *root)
4996 {
4997         list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
4998         return 0;
4999 }
5000
5001 int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
5002 {
5003         struct btrfs_root *reloc_root;
5004         struct btrfs_trans_handle *trans;
5005         struct btrfs_key location;
5006         int found;
5007         int ret;
5008
5009         mutex_lock(&root->fs_info->tree_reloc_mutex);
5010         ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
5011         BUG_ON(ret);
5012         found = !list_empty(&root->fs_info->dead_reloc_roots);
5013         mutex_unlock(&root->fs_info->tree_reloc_mutex);
5014
5015         if (found) {
5016                 trans = btrfs_start_transaction(root, 1);
5017                 BUG_ON(!trans);
5018                 ret = btrfs_commit_transaction(trans, root);
5019                 BUG_ON(ret);
5020         }
5021
5022         location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
5023         location.offset = (u64)-1;
5024         location.type = BTRFS_ROOT_ITEM_KEY;
5025
5026         reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
5027         BUG_ON(!reloc_root);
5028         btrfs_orphan_cleanup(reloc_root);
5029         return 0;
5030 }
5031
5032 static int noinline init_reloc_tree(struct btrfs_trans_handle *trans,
5033                                     struct btrfs_root *root)
5034 {
5035         struct btrfs_root *reloc_root;
5036         struct extent_buffer *eb;
5037         struct btrfs_root_item *root_item;
5038         struct btrfs_key root_key;
5039         int ret;
5040
5041         BUG_ON(!root->ref_cows);
5042         if (root->reloc_root)
5043                 return 0;
5044
5045         root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
5046         BUG_ON(!root_item);
5047
5048         ret = btrfs_copy_root(trans, root, root->commit_root,
5049                               &eb, BTRFS_TREE_RELOC_OBJECTID);
5050         BUG_ON(ret);
5051
5052         root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
5053         root_key.offset = root->root_key.objectid;
5054         root_key.type = BTRFS_ROOT_ITEM_KEY;
5055
5056         memcpy(root_item, &root->root_item, sizeof(root_item));
5057         btrfs_set_root_refs(root_item, 0);
5058         btrfs_set_root_bytenr(root_item, eb->start);
5059         btrfs_set_root_level(root_item, btrfs_header_level(eb));
5060         btrfs_set_root_generation(root_item, trans->transid);
5061
5062         btrfs_tree_unlock(eb);
5063         free_extent_buffer(eb);
5064
5065         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
5066                                 &root_key, root_item);
5067         BUG_ON(ret);
5068         kfree(root_item);
5069
5070         reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
5071                                                  &root_key);
5072         BUG_ON(!reloc_root);
5073         reloc_root->last_trans = trans->transid;
5074         reloc_root->commit_root = NULL;
5075         reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
5076
5077         root->reloc_root = reloc_root;
5078         return 0;
5079 }
5080
5081 /*
5082  * Core function of space balance.
5083  *
5084  * The idea is using reloc trees to relocate tree blocks in reference
5085  * counted roots. There is one reloc tree for each subvol, and all
5086  * reloc trees share same root key objectid. Reloc trees are snapshots
5087  * of the latest committed roots of subvols (root->commit_root).
5088  *
5089  * To relocate a tree block referenced by a subvol, there are two steps.
5090  * COW the block through subvol's reloc tree, then update block pointer
5091  * in the subvol to point to the new block. Since all reloc trees share
5092  * same root key objectid, doing special handing for tree blocks owned
5093  * by them is easy. Once a tree block has been COWed in one reloc tree,
5094  * we can use the resulting new block directly when the same block is
5095  * required to COW again through other reloc trees. By this way, relocated
5096  * tree blocks are shared between reloc trees, so they are also shared
5097  * between subvols.
5098  */
5099 static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
5100                                       struct btrfs_root *root,
5101                                       struct btrfs_path *path,
5102                                       struct btrfs_key *first_key,
5103                                       struct btrfs_ref_path *ref_path,
5104                                       struct btrfs_block_group_cache *group,
5105                                       struct inode *reloc_inode)
5106 {
5107         struct btrfs_root *reloc_root;
5108         struct extent_buffer *eb = NULL;
5109         struct btrfs_key *keys;
5110         u64 *nodes;
5111         int level;
5112         int shared_level;
5113         int lowest_level = 0;
5114         int ret;
5115
5116         if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
5117                 lowest_level = ref_path->owner_objectid;
5118
5119         if (!root->ref_cows) {
5120                 path->lowest_level = lowest_level;
5121                 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
5122                 BUG_ON(ret < 0);
5123                 path->lowest_level = 0;
5124                 btrfs_release_path(root, path);
5125                 return 0;
5126         }
5127
5128         mutex_lock(&root->fs_info->tree_reloc_mutex);
5129         ret = init_reloc_tree(trans, root);
5130         BUG_ON(ret);
5131         reloc_root = root->reloc_root;
5132
5133         shared_level = ref_path->shared_level;
5134         ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
5135
5136         keys = ref_path->node_keys;
5137         nodes = ref_path->new_nodes;
5138         memset(&keys[shared_level + 1], 0,
5139                sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
5140         memset(&nodes[shared_level + 1], 0,
5141                sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
5142
5143         if (nodes[lowest_level] == 0) {
5144                 path->lowest_level = lowest_level;
5145                 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
5146                                         0, 1);
5147                 BUG_ON(ret);
5148                 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
5149                         eb = path->nodes[level];
5150                         if (!eb || eb == reloc_root->node)
5151                                 break;
5152                         nodes[level] = eb->start;
5153                         if (level == 0)
5154                                 btrfs_item_key_to_cpu(eb, &keys[level], 0);
5155                         else
5156                                 btrfs_node_key_to_cpu(eb, &keys[level], 0);
5157                 }
5158                 if (nodes[0] &&
5159                     ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5160                         eb = path->nodes[0];
5161                         ret = replace_extents_in_leaf(trans, reloc_root, eb,
5162                                                       group, reloc_inode);
5163                         BUG_ON(ret);
5164                 }
5165                 btrfs_release_path(reloc_root, path);
5166         } else {
5167                 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
5168                                        lowest_level);
5169                 BUG_ON(ret);
5170         }
5171
5172         /*
5173          * replace tree blocks in the fs tree with tree blocks in
5174          * the reloc tree.
5175          */
5176         ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
5177         BUG_ON(ret < 0);
5178
5179         if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5180                 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
5181                                         0, 0);
5182                 BUG_ON(ret);
5183                 extent_buffer_get(path->nodes[0]);
5184                 eb = path->nodes[0];
5185                 btrfs_release_path(reloc_root, path);
5186                 ret = invalidate_extent_cache(reloc_root, eb, group, root);
5187                 BUG_ON(ret);
5188                 free_extent_buffer(eb);
5189         }
5190
5191         mutex_unlock(&root->fs_info->tree_reloc_mutex);
5192         path->lowest_level = 0;
5193         return 0;
5194 }
5195
5196 static int noinline relocate_tree_block(struct btrfs_trans_handle *trans,
5197                                         struct btrfs_root *root,
5198                                         struct btrfs_path *path,
5199                                         struct btrfs_key *first_key,
5200                                         struct btrfs_ref_path *ref_path)
5201 {
5202         int ret;
5203
5204         ret = relocate_one_path(trans, root, path, first_key,
5205                                 ref_path, NULL, NULL);
5206         BUG_ON(ret);
5207
5208         if (root == root->fs_info->extent_root)
5209                 btrfs_extent_post_op(trans, root);
5210
5211         return 0;
5212 }
5213
5214 static int noinline del_extent_zero(struct btrfs_trans_handle *trans,
5215                                     struct btrfs_root *extent_root,
5216                                     struct btrfs_path *path,
5217                                     struct btrfs_key *extent_key)
5218 {
5219         int ret;
5220
5221         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
5222         if (ret)
5223                 goto out;
5224         ret = btrfs_del_item(trans, extent_root, path);
5225 out:
5226         btrfs_release_path(extent_root, path);
5227         return ret;
5228 }
5229
5230 static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info,
5231                                                 struct btrfs_ref_path *ref_path)
5232 {
5233         struct btrfs_key root_key;
5234
5235         root_key.objectid = ref_path->root_objectid;
5236         root_key.type = BTRFS_ROOT_ITEM_KEY;
5237         if (is_cowonly_root(ref_path->root_objectid))
5238                 root_key.offset = 0;
5239         else
5240                 root_key.offset = (u64)-1;
5241
5242         return btrfs_read_fs_root_no_name(fs_info, &root_key);
5243 }
5244
5245 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
5246                                         struct btrfs_path *path,
5247                                         struct btrfs_key *extent_key,
5248                                         struct btrfs_block_group_cache *group,
5249                                         struct inode *reloc_inode, int pass)
5250 {
5251         struct btrfs_trans_handle *trans;
5252         struct btrfs_root *found_root;
5253         struct btrfs_ref_path *ref_path = NULL;
5254         struct disk_extent *new_extents = NULL;
5255         int nr_extents = 0;
5256         int loops;
5257         int ret;
5258         int level;
5259         struct btrfs_key first_key;
5260         u64 prev_block = 0;
5261
5262
5263         trans = btrfs_start_transaction(extent_root, 1);
5264         BUG_ON(!trans);
5265
5266         if (extent_key->objectid == 0) {
5267                 ret = del_extent_zero(trans, extent_root, path, extent_key);
5268                 goto out;
5269         }
5270
5271         ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
5272         if (!ref_path) {
5273                ret = -ENOMEM;
5274                goto out;
5275         }
5276
5277         for (loops = 0; ; loops++) {
5278                 if (loops == 0) {
5279                         ret = btrfs_first_ref_path(trans, extent_root, ref_path,
5280                                                    extent_key->objectid);
5281                 } else {
5282                         ret = btrfs_next_ref_path(trans, extent_root, ref_path);
5283                 }
5284                 if (ret < 0)
5285                         goto out;
5286                 if (ret > 0)
5287                         break;
5288
5289                 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
5290                     ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
5291                         continue;
5292
5293                 found_root = read_ref_root(extent_root->fs_info, ref_path);
5294                 BUG_ON(!found_root);
5295                 /*
5296                  * for reference counted tree, only process reference paths
5297                  * rooted at the latest committed root.
5298                  */
5299                 if (found_root->ref_cows &&
5300                     ref_path->root_generation != found_root->root_key.offset)
5301                         continue;
5302
5303                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5304                         if (pass == 0) {
5305                                 /*
5306                                  * copy data extents to new locations
5307                                  */
5308                                 u64 group_start = group->key.objectid;
5309                                 ret = relocate_data_extent(reloc_inode,
5310                                                            extent_key,
5311                                                            group_start);
5312                                 if (ret < 0)
5313                                         goto out;
5314                                 break;
5315                         }
5316                         level = 0;
5317                 } else {
5318                         level = ref_path->owner_objectid;
5319                 }
5320
5321                 if (prev_block != ref_path->nodes[level]) {
5322                         struct extent_buffer *eb;
5323                         u64 block_start = ref_path->nodes[level];
5324                         u64 block_size = btrfs_level_size(found_root, level);
5325
5326                         eb = read_tree_block(found_root, block_start,
5327                                              block_size, 0);
5328                         btrfs_tree_lock(eb);
5329                         BUG_ON(level != btrfs_header_level(eb));
5330
5331                         if (level == 0)
5332                                 btrfs_item_key_to_cpu(eb, &first_key, 0);
5333                         else
5334                                 btrfs_node_key_to_cpu(eb, &first_key, 0);
5335
5336                         btrfs_tree_unlock(eb);
5337                         free_extent_buffer(eb);
5338                         prev_block = block_start;
5339                 }
5340
5341                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
5342                     pass >= 2) {
5343                         /*
5344                          * use fallback method to process the remaining
5345                          * references.
5346                          */
5347                         if (!new_extents) {
5348                                 u64 group_start = group->key.objectid;
5349                                 new_extents = kmalloc(sizeof(*new_extents),
5350                                                       GFP_NOFS);
5351                                 nr_extents = 1;
5352                                 ret = get_new_locations(reloc_inode,
5353                                                         extent_key,
5354                                                         group_start, 1,
5355                                                         &new_extents,
5356                                                         &nr_extents);
5357                                 if (ret)
5358                                         goto out;
5359                         }
5360                         btrfs_record_root_in_trans(found_root);
5361                         ret = replace_one_extent(trans, found_root,
5362                                                 path, extent_key,
5363                                                 &first_key, ref_path,
5364                                                 new_extents, nr_extents);
5365                         if (ret < 0)
5366                                 goto out;
5367                         continue;
5368                 }
5369
5370                 btrfs_record_root_in_trans(found_root);
5371                 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
5372                         ret = relocate_tree_block(trans, found_root, path,
5373                                                   &first_key, ref_path);
5374                 } else {
5375                         /*
5376                          * try to update data extent references while
5377                          * keeping metadata shared between snapshots.
5378                          */
5379                         ret = relocate_one_path(trans, found_root, path,
5380                                                 &first_key, ref_path,
5381                                                 group, reloc_inode);
5382                 }
5383                 if (ret < 0)
5384                         goto out;
5385         }
5386         ret = 0;
5387 out:
5388         btrfs_end_transaction(trans, extent_root);
5389         kfree(new_extents);
5390         kfree(ref_path);
5391         return ret;
5392 }
5393
5394 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
5395 {
5396         u64 num_devices;
5397         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
5398                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
5399
5400         num_devices = root->fs_info->fs_devices->rw_devices;
5401         if (num_devices == 1) {
5402                 stripped |= BTRFS_BLOCK_GROUP_DUP;
5403                 stripped = flags & ~stripped;
5404
5405                 /* turn raid0 into single device chunks */
5406                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
5407                         return stripped;
5408
5409                 /* turn mirroring into duplication */
5410                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
5411                              BTRFS_BLOCK_GROUP_RAID10))
5412                         return stripped | BTRFS_BLOCK_GROUP_DUP;
5413                 return flags;
5414         } else {
5415                 /* they already had raid on here, just return */
5416                 if (flags & stripped)
5417                         return flags;
5418
5419                 stripped |= BTRFS_BLOCK_GROUP_DUP;
5420                 stripped = flags & ~stripped;
5421
5422                 /* switch duplicated blocks with raid1 */
5423                 if (flags & BTRFS_BLOCK_GROUP_DUP)
5424                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
5425
5426                 /* turn single device chunks into raid0 */
5427                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
5428         }
5429         return flags;
5430 }
5431
5432 int __alloc_chunk_for_shrink(struct btrfs_root *root,
5433                      struct btrfs_block_group_cache *shrink_block_group,
5434                      int force)
5435 {
5436         struct btrfs_trans_handle *trans;
5437         u64 new_alloc_flags;
5438         u64 calc;
5439
5440         spin_lock(&shrink_block_group->lock);
5441         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
5442                 spin_unlock(&shrink_block_group->lock);
5443
5444                 trans = btrfs_start_transaction(root, 1);
5445                 spin_lock(&shrink_block_group->lock);
5446
5447                 new_alloc_flags = update_block_group_flags(root,
5448                                                    shrink_block_group->flags);
5449                 if (new_alloc_flags != shrink_block_group->flags) {
5450                         calc =
5451                              btrfs_block_group_used(&shrink_block_group->item);
5452                 } else {
5453                         calc = shrink_block_group->key.offset;
5454                 }
5455                 spin_unlock(&shrink_block_group->lock);
5456
5457                 do_chunk_alloc(trans, root->fs_info->extent_root,
5458                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
5459
5460                 btrfs_end_transaction(trans, root);
5461         } else
5462                 spin_unlock(&shrink_block_group->lock);
5463         return 0;
5464 }
5465
5466 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
5467                                  struct btrfs_root *root,
5468                                  u64 objectid, u64 size)
5469 {
5470         struct btrfs_path *path;
5471         struct btrfs_inode_item *item;
5472         struct extent_buffer *leaf;
5473         int ret;
5474
5475         path = btrfs_alloc_path();
5476         if (!path)
5477                 return -ENOMEM;
5478
5479         ret = btrfs_insert_empty_inode(trans, root, path, objectid);
5480         if (ret)
5481                 goto out;
5482
5483         leaf = path->nodes[0];
5484         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
5485         memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
5486         btrfs_set_inode_generation(leaf, item, 1);
5487         btrfs_set_inode_size(leaf, item, size);
5488         btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
5489         btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM |
5490                                           BTRFS_INODE_NOCOMPRESS);
5491         btrfs_mark_buffer_dirty(leaf);
5492         btrfs_release_path(root, path);
5493 out:
5494         btrfs_free_path(path);
5495         return ret;
5496 }
5497
5498 static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
5499                                         struct btrfs_block_group_cache *group)
5500 {
5501         struct inode *inode = NULL;
5502         struct btrfs_trans_handle *trans;
5503         struct btrfs_root *root;
5504         struct btrfs_key root_key;
5505         u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
5506         int err = 0;
5507
5508         root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
5509         root_key.type = BTRFS_ROOT_ITEM_KEY;
5510         root_key.offset = (u64)-1;
5511         root = btrfs_read_fs_root_no_name(fs_info, &root_key);
5512         if (IS_ERR(root))
5513                 return ERR_CAST(root);
5514
5515         trans = btrfs_start_transaction(root, 1);
5516         BUG_ON(!trans);
5517
5518         err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
5519         if (err)
5520                 goto out;
5521
5522         err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
5523         BUG_ON(err);
5524
5525         err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
5526                                        group->key.offset, 0, group->key.offset,
5527                                        0, 0, 0);
5528         BUG_ON(err);
5529
5530         inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
5531         if (inode->i_state & I_NEW) {
5532                 BTRFS_I(inode)->root = root;
5533                 BTRFS_I(inode)->location.objectid = objectid;
5534                 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
5535                 BTRFS_I(inode)->location.offset = 0;
5536                 btrfs_read_locked_inode(inode);
5537                 unlock_new_inode(inode);
5538                 BUG_ON(is_bad_inode(inode));
5539         } else {
5540                 BUG_ON(1);
5541         }
5542
5543         err = btrfs_orphan_add(trans, inode);
5544 out:
5545         btrfs_end_transaction(trans, root);
5546         if (err) {
5547                 if (inode)
5548                         iput(inode);
5549                 inode = ERR_PTR(err);
5550         }
5551         return inode;
5552 }
5553
5554 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
5555 {
5556         struct btrfs_trans_handle *trans;
5557         struct btrfs_path *path;
5558         struct btrfs_fs_info *info = root->fs_info;
5559         struct extent_buffer *leaf;
5560         struct inode *reloc_inode;
5561         struct btrfs_block_group_cache *block_group;
5562         struct btrfs_key key;
5563         u64 skipped;
5564         u64 cur_byte;
5565         u64 total_found;
5566         u32 nritems;
5567         int ret;
5568         int progress;
5569         int pass = 0;
5570
5571         root = root->fs_info->extent_root;
5572
5573         block_group = btrfs_lookup_block_group(info, group_start);
5574         BUG_ON(!block_group);
5575
5576         printk("btrfs relocating block group %llu flags %llu\n",
5577                (unsigned long long)block_group->key.objectid,
5578                (unsigned long long)block_group->flags);
5579
5580         path = btrfs_alloc_path();
5581         BUG_ON(!path);
5582
5583         reloc_inode = create_reloc_inode(info, block_group);
5584         BUG_ON(IS_ERR(reloc_inode));
5585
5586         __alloc_chunk_for_shrink(root, block_group, 1);
5587         set_block_group_readonly(block_group);
5588
5589         btrfs_start_delalloc_inodes(info->tree_root);
5590         btrfs_wait_ordered_extents(info->tree_root, 0);
5591 again:
5592         skipped = 0;
5593         total_found = 0;
5594         progress = 0;
5595         key.objectid = block_group->key.objectid;
5596         key.offset = 0;
5597         key.type = 0;
5598         cur_byte = key.objectid;
5599
5600         trans = btrfs_start_transaction(info->tree_root, 1);
5601         btrfs_commit_transaction(trans, info->tree_root);
5602
5603         mutex_lock(&root->fs_info->cleaner_mutex);
5604         btrfs_clean_old_snapshots(info->tree_root);
5605         btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
5606         mutex_unlock(&root->fs_info->cleaner_mutex);
5607
5608         while(1) {
5609                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5610                 if (ret < 0)
5611                         goto out;
5612 next:
5613                 leaf = path->nodes[0];
5614                 nritems = btrfs_header_nritems(leaf);
5615                 if (path->slots[0] >= nritems) {
5616                         ret = btrfs_next_leaf(root, path);
5617                         if (ret < 0)
5618                                 goto out;
5619                         if (ret == 1) {
5620                                 ret = 0;
5621                                 break;
5622                         }
5623                         leaf = path->nodes[0];
5624                         nritems = btrfs_header_nritems(leaf);
5625                 }
5626
5627                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5628
5629                 if (key.objectid >= block_group->key.objectid +
5630                     block_group->key.offset)
5631                         break;
5632
5633                 if (progress && need_resched()) {
5634                         btrfs_release_path(root, path);
5635                         cond_resched();
5636                         progress = 0;
5637                         continue;
5638                 }
5639                 progress = 1;
5640
5641                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
5642                     key.objectid + key.offset <= cur_byte) {
5643                         path->slots[0]++;
5644                         goto next;
5645                 }
5646
5647                 total_found++;
5648                 cur_byte = key.objectid + key.offset;
5649                 btrfs_release_path(root, path);
5650
5651                 __alloc_chunk_for_shrink(root, block_group, 0);
5652                 ret = relocate_one_extent(root, path, &key, block_group,
5653                                           reloc_inode, pass);
5654                 BUG_ON(ret < 0);
5655                 if (ret > 0)
5656                         skipped++;
5657
5658                 key.objectid = cur_byte;
5659                 key.type = 0;
5660                 key.offset = 0;
5661         }
5662
5663         btrfs_release_path(root, path);
5664
5665         if (pass == 0) {
5666                 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
5667                 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
5668                 WARN_ON(reloc_inode->i_mapping->nrpages);
5669         }
5670
5671         if (total_found > 0) {
5672                 printk("btrfs found %llu extents in pass %d\n",
5673                        (unsigned long long)total_found, pass);
5674                 pass++;
5675                 if (total_found == skipped && pass > 2) {
5676                         iput(reloc_inode);
5677                         reloc_inode = create_reloc_inode(info, block_group);
5678                         pass = 0;
5679                 }
5680                 goto again;
5681         }
5682
5683         /* delete reloc_inode */
5684         iput(reloc_inode);
5685
5686         /* unpin extents in this range */
5687         trans = btrfs_start_transaction(info->tree_root, 1);
5688         btrfs_commit_transaction(trans, info->tree_root);
5689
5690         spin_lock(&block_group->lock);
5691         WARN_ON(block_group->pinned > 0);
5692         WARN_ON(block_group->reserved > 0);
5693         WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
5694         spin_unlock(&block_group->lock);
5695         ret = 0;
5696 out:
5697         btrfs_free_path(path);
5698         return ret;
5699 }
5700
5701 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
5702                            struct btrfs_key *key)
5703 {
5704         int ret = 0;
5705         struct btrfs_key found_key;
5706         struct extent_buffer *leaf;
5707         int slot;
5708
5709         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
5710         if (ret < 0)
5711                 goto out;
5712
5713         while(1) {
5714                 slot = path->slots[0];
5715                 leaf = path->nodes[0];
5716                 if (slot >= btrfs_header_nritems(leaf)) {
5717                         ret = btrfs_next_leaf(root, path);
5718                         if (ret == 0)
5719                                 continue;
5720                         if (ret < 0)
5721                                 goto out;
5722                         break;
5723                 }
5724                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5725
5726                 if (found_key.objectid >= key->objectid &&
5727                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5728                         ret = 0;
5729                         goto out;
5730                 }
5731                 path->slots[0]++;
5732         }
5733         ret = -ENOENT;
5734 out:
5735         return ret;
5736 }
5737
5738 int btrfs_free_block_groups(struct btrfs_fs_info *info)
5739 {
5740         struct btrfs_block_group_cache *block_group;
5741         struct rb_node *n;
5742
5743         spin_lock(&info->block_group_cache_lock);
5744         while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
5745                 block_group = rb_entry(n, struct btrfs_block_group_cache,
5746                                        cache_node);
5747                 rb_erase(&block_group->cache_node,
5748                          &info->block_group_cache_tree);
5749                 spin_unlock(&info->block_group_cache_lock);
5750
5751                 btrfs_remove_free_space_cache(block_group);
5752                 down_write(&block_group->space_info->groups_sem);
5753                 list_del(&block_group->list);
5754                 up_write(&block_group->space_info->groups_sem);
5755                 kfree(block_group);
5756
5757                 spin_lock(&info->block_group_cache_lock);
5758         }
5759         spin_unlock(&info->block_group_cache_lock);
5760         return 0;
5761 }
5762
5763 int btrfs_read_block_groups(struct btrfs_root *root)
5764 {
5765         struct btrfs_path *path;
5766         int ret;
5767         struct btrfs_block_group_cache *cache;
5768         struct btrfs_fs_info *info = root->fs_info;
5769         struct btrfs_space_info *space_info;
5770         struct btrfs_key key;
5771         struct btrfs_key found_key;
5772         struct extent_buffer *leaf;
5773
5774         root = info->extent_root;
5775         key.objectid = 0;
5776         key.offset = 0;
5777         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
5778         path = btrfs_alloc_path();
5779         if (!path)
5780                 return -ENOMEM;
5781
5782         while(1) {
5783                 ret = find_first_block_group(root, path, &key);
5784                 if (ret > 0) {
5785                         ret = 0;
5786                         goto error;
5787                 }
5788                 if (ret != 0)
5789                         goto error;
5790
5791                 leaf = path->nodes[0];
5792                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5793                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
5794                 if (!cache) {
5795                         ret = -ENOMEM;
5796                         break;
5797                 }
5798
5799                 spin_lock_init(&cache->lock);
5800                 mutex_init(&cache->alloc_mutex);
5801                 INIT_LIST_HEAD(&cache->list);
5802                 read_extent_buffer(leaf, &cache->item,
5803                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
5804                                    sizeof(cache->item));
5805                 memcpy(&cache->key, &found_key, sizeof(found_key));
5806
5807                 key.objectid = found_key.objectid + found_key.offset;
5808                 btrfs_release_path(root, path);
5809                 cache->flags = btrfs_block_group_flags(&cache->item);
5810
5811                 ret = update_space_info(info, cache->flags, found_key.offset,
5812                                         btrfs_block_group_used(&cache->item),
5813                                         &space_info);
5814                 BUG_ON(ret);
5815                 cache->space_info = space_info;
5816                 down_write(&space_info->groups_sem);
5817                 list_add_tail(&cache->list, &space_info->block_groups);
5818                 up_write(&space_info->groups_sem);
5819
5820                 ret = btrfs_add_block_group_cache(root->fs_info, cache);
5821                 BUG_ON(ret);
5822
5823                 set_avail_alloc_bits(root->fs_info, cache->flags);
5824                 if (btrfs_chunk_readonly(root, cache->key.objectid))
5825                         set_block_group_readonly(cache);
5826         }
5827         ret = 0;
5828 error:
5829         btrfs_free_path(path);
5830         return ret;
5831 }
5832
5833 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
5834                            struct btrfs_root *root, u64 bytes_used,
5835                            u64 type, u64 chunk_objectid, u64 chunk_offset,
5836                            u64 size)
5837 {
5838         int ret;
5839         struct btrfs_root *extent_root;
5840         struct btrfs_block_group_cache *cache;
5841
5842         extent_root = root->fs_info->extent_root;
5843
5844         root->fs_info->last_trans_new_blockgroup = trans->transid;
5845
5846         cache = kzalloc(sizeof(*cache), GFP_NOFS);
5847         if (!cache)
5848                 return -ENOMEM;
5849
5850         cache->key.objectid = chunk_offset;
5851         cache->key.offset = size;
5852         spin_lock_init(&cache->lock);
5853         mutex_init(&cache->alloc_mutex);
5854         INIT_LIST_HEAD(&cache->list);
5855         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
5856
5857         btrfs_set_block_group_used(&cache->item, bytes_used);
5858         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
5859         cache->flags = type;
5860         btrfs_set_block_group_flags(&cache->item, type);
5861
5862         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
5863                                 &cache->space_info);
5864         BUG_ON(ret);
5865         down_write(&cache->space_info->groups_sem);
5866         list_add_tail(&cache->list, &cache->space_info->block_groups);
5867         up_write(&cache->space_info->groups_sem);
5868
5869         ret = btrfs_add_block_group_cache(root->fs_info, cache);
5870         BUG_ON(ret);
5871
5872         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
5873                                 sizeof(cache->item));
5874         BUG_ON(ret);
5875
5876         finish_current_insert(trans, extent_root, 0);
5877         ret = del_pending_extents(trans, extent_root, 0);
5878         BUG_ON(ret);
5879         set_avail_alloc_bits(extent_root->fs_info, type);
5880
5881         return 0;
5882 }
5883
5884 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5885                              struct btrfs_root *root, u64 group_start)
5886 {
5887         struct btrfs_path *path;
5888         struct btrfs_block_group_cache *block_group;
5889         struct btrfs_key key;
5890         int ret;
5891
5892         root = root->fs_info->extent_root;
5893
5894         block_group = btrfs_lookup_block_group(root->fs_info, group_start);
5895         BUG_ON(!block_group);
5896         BUG_ON(!block_group->ro);
5897
5898         memcpy(&key, &block_group->key, sizeof(key));
5899
5900         path = btrfs_alloc_path();
5901         BUG_ON(!path);
5902
5903         btrfs_remove_free_space_cache(block_group);
5904         rb_erase(&block_group->cache_node,
5905                  &root->fs_info->block_group_cache_tree);
5906         down_write(&block_group->space_info->groups_sem);
5907         list_del(&block_group->list);
5908         up_write(&block_group->space_info->groups_sem);
5909
5910         spin_lock(&block_group->space_info->lock);
5911         block_group->space_info->total_bytes -= block_group->key.offset;
5912         block_group->space_info->bytes_readonly -= block_group->key.offset;
5913         spin_unlock(&block_group->space_info->lock);
5914         block_group->space_info->full = 0;
5915
5916         /*
5917         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
5918         kfree(shrink_block_group);
5919         */
5920
5921         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5922         if (ret > 0)
5923                 ret = -EIO;
5924         if (ret < 0)
5925                 goto out;
5926
5927         ret = btrfs_del_item(trans, root, path);
5928 out:
5929         btrfs_free_path(path);
5930         return ret;
5931 }