Btrfs: compat code fixes
[safe/jmp/linux-2.6] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include <linux/version.h>
23 #include "compat.h"
24 #include "hash.h"
25 #include "crc32c.h"
26 #include "ctree.h"
27 #include "disk-io.h"
28 #include "print-tree.h"
29 #include "transaction.h"
30 #include "volumes.h"
31 #include "locking.h"
32 #include "ref-cache.h"
33 #include "compat.h"
34
35 #define PENDING_EXTENT_INSERT 0
36 #define PENDING_EXTENT_DELETE 1
37 #define PENDING_BACKREF_UPDATE 2
38
39 struct pending_extent_op {
40         int type;
41         u64 bytenr;
42         u64 num_bytes;
43         u64 parent;
44         u64 orig_parent;
45         u64 generation;
46         u64 orig_generation;
47         int level;
48         struct list_head list;
49         int del;
50 };
51
52 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
53                                  btrfs_root *extent_root, int all);
54 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
55                                btrfs_root *extent_root, int all);
56 static struct btrfs_block_group_cache *
57 __btrfs_find_block_group(struct btrfs_root *root,
58                          struct btrfs_block_group_cache *hint,
59                          u64 search_start, int data, int owner);
60 static int pin_down_bytes(struct btrfs_trans_handle *trans,
61                           struct btrfs_root *root,
62                           u64 bytenr, u64 num_bytes, int is_data);
63 static int update_block_group(struct btrfs_trans_handle *trans,
64                               struct btrfs_root *root,
65                               u64 bytenr, u64 num_bytes, int alloc,
66                               int mark_free);
67
68 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
69 {
70         return (cache->flags & bits) == bits;
71 }
72
73 /*
74  * this adds the block group to the fs_info rb tree for the block group
75  * cache
76  */
77 int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
78                                 struct btrfs_block_group_cache *block_group)
79 {
80         struct rb_node **p;
81         struct rb_node *parent = NULL;
82         struct btrfs_block_group_cache *cache;
83
84         spin_lock(&info->block_group_cache_lock);
85         p = &info->block_group_cache_tree.rb_node;
86
87         while (*p) {
88                 parent = *p;
89                 cache = rb_entry(parent, struct btrfs_block_group_cache,
90                                  cache_node);
91                 if (block_group->key.objectid < cache->key.objectid) {
92                         p = &(*p)->rb_left;
93                 } else if (block_group->key.objectid > cache->key.objectid) {
94                         p = &(*p)->rb_right;
95                 } else {
96                         spin_unlock(&info->block_group_cache_lock);
97                         return -EEXIST;
98                 }
99         }
100
101         rb_link_node(&block_group->cache_node, parent, p);
102         rb_insert_color(&block_group->cache_node,
103                         &info->block_group_cache_tree);
104         spin_unlock(&info->block_group_cache_lock);
105
106         return 0;
107 }
108
109 /*
110  * This will return the block group at or after bytenr if contains is 0, else
111  * it will return the block group that contains the bytenr
112  */
113 static struct btrfs_block_group_cache *
114 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
115                               int contains)
116 {
117         struct btrfs_block_group_cache *cache, *ret = NULL;
118         struct rb_node *n;
119         u64 end, start;
120
121         spin_lock(&info->block_group_cache_lock);
122         n = info->block_group_cache_tree.rb_node;
123
124         while (n) {
125                 cache = rb_entry(n, struct btrfs_block_group_cache,
126                                  cache_node);
127                 end = cache->key.objectid + cache->key.offset - 1;
128                 start = cache->key.objectid;
129
130                 if (bytenr < start) {
131                         if (!contains && (!ret || start < ret->key.objectid))
132                                 ret = cache;
133                         n = n->rb_left;
134                 } else if (bytenr > start) {
135                         if (contains && bytenr <= end) {
136                                 ret = cache;
137                                 break;
138                         }
139                         n = n->rb_right;
140                 } else {
141                         ret = cache;
142                         break;
143                 }
144         }
145         spin_unlock(&info->block_group_cache_lock);
146
147         return ret;
148 }
149
150 /*
151  * this is only called by cache_block_group, since we could have freed extents
152  * we need to check the pinned_extents for any extents that can't be used yet
153  * since their free space will be released as soon as the transaction commits.
154  */
155 static int add_new_free_space(struct btrfs_block_group_cache *block_group,
156                               struct btrfs_fs_info *info, u64 start, u64 end)
157 {
158         u64 extent_start, extent_end, size;
159         int ret;
160
161         mutex_lock(&info->pinned_mutex);
162         while (start < end) {
163                 ret = find_first_extent_bit(&info->pinned_extents, start,
164                                             &extent_start, &extent_end,
165                                             EXTENT_DIRTY);
166                 if (ret)
167                         break;
168
169                 if (extent_start == start) {
170                         start = extent_end + 1;
171                 } else if (extent_start > start && extent_start < end) {
172                         size = extent_start - start;
173                         ret = btrfs_add_free_space_lock(block_group, start,
174                                                         size);
175                         BUG_ON(ret);
176                         start = extent_end + 1;
177                 } else {
178                         break;
179                 }
180         }
181
182         if (start < end) {
183                 size = end - start;
184                 ret = btrfs_add_free_space_lock(block_group, start, size);
185                 BUG_ON(ret);
186         }
187         mutex_unlock(&info->pinned_mutex);
188
189         return 0;
190 }
191
192 static int cache_block_group(struct btrfs_root *root,
193                              struct btrfs_block_group_cache *block_group)
194 {
195         struct btrfs_path *path;
196         int ret = 0;
197         struct btrfs_key key;
198         struct extent_buffer *leaf;
199         int slot;
200         u64 last = 0;
201         u64 first_free;
202         int found = 0;
203
204         if (!block_group)
205                 return 0;
206
207         root = root->fs_info->extent_root;
208
209         if (block_group->cached)
210                 return 0;
211
212         path = btrfs_alloc_path();
213         if (!path)
214                 return -ENOMEM;
215
216         path->reada = 2;
217         /*
218          * we get into deadlocks with paths held by callers of this function.
219          * since the alloc_mutex is protecting things right now, just
220          * skip the locking here
221          */
222         path->skip_locking = 1;
223         first_free = max_t(u64, block_group->key.objectid,
224                            BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
225         key.objectid = block_group->key.objectid;
226         key.offset = 0;
227         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
228         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
229         if (ret < 0)
230                 goto err;
231         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
232         if (ret < 0)
233                 goto err;
234         if (ret == 0) {
235                 leaf = path->nodes[0];
236                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
237                 if (key.objectid + key.offset > first_free)
238                         first_free = key.objectid + key.offset;
239         }
240         while(1) {
241                 leaf = path->nodes[0];
242                 slot = path->slots[0];
243                 if (slot >= btrfs_header_nritems(leaf)) {
244                         ret = btrfs_next_leaf(root, path);
245                         if (ret < 0)
246                                 goto err;
247                         if (ret == 0)
248                                 continue;
249                         else
250                                 break;
251                 }
252                 btrfs_item_key_to_cpu(leaf, &key, slot);
253                 if (key.objectid < block_group->key.objectid)
254                         goto next;
255
256                 if (key.objectid >= block_group->key.objectid +
257                     block_group->key.offset)
258                         break;
259
260                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
261                         if (!found) {
262                                 last = first_free;
263                                 found = 1;
264                         }
265
266                         add_new_free_space(block_group, root->fs_info, last,
267                                            key.objectid);
268
269                         last = key.objectid + key.offset;
270                 }
271 next:
272                 path->slots[0]++;
273         }
274
275         if (!found)
276                 last = first_free;
277
278         add_new_free_space(block_group, root->fs_info, last,
279                            block_group->key.objectid +
280                            block_group->key.offset);
281
282         block_group->cached = 1;
283         ret = 0;
284 err:
285         btrfs_free_path(path);
286         return ret;
287 }
288
289 /*
290  * return the block group that starts at or after bytenr
291  */
292 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
293                                                        btrfs_fs_info *info,
294                                                          u64 bytenr)
295 {
296         struct btrfs_block_group_cache *cache;
297
298         cache = block_group_cache_tree_search(info, bytenr, 0);
299
300         return cache;
301 }
302
303 /*
304  * return the block group that contains teh given bytenr
305  */
306 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
307                                                          btrfs_fs_info *info,
308                                                          u64 bytenr)
309 {
310         struct btrfs_block_group_cache *cache;
311
312         cache = block_group_cache_tree_search(info, bytenr, 1);
313
314         return cache;
315 }
316
317 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
318                                                   u64 flags)
319 {
320         struct list_head *head = &info->space_info;
321         struct list_head *cur;
322         struct btrfs_space_info *found;
323         list_for_each(cur, head) {
324                 found = list_entry(cur, struct btrfs_space_info, list);
325                 if (found->flags == flags)
326                         return found;
327         }
328         return NULL;
329 }
330
331 static u64 div_factor(u64 num, int factor)
332 {
333         if (factor == 10)
334                 return num;
335         num *= factor;
336         do_div(num, 10);
337         return num;
338 }
339
340 static struct btrfs_block_group_cache *
341 __btrfs_find_block_group(struct btrfs_root *root,
342                          struct btrfs_block_group_cache *hint,
343                          u64 search_start, int data, int owner)
344 {
345         struct btrfs_block_group_cache *cache;
346         struct btrfs_block_group_cache *found_group = NULL;
347         struct btrfs_fs_info *info = root->fs_info;
348         u64 used;
349         u64 last = 0;
350         u64 free_check;
351         int full_search = 0;
352         int factor = 10;
353         int wrapped = 0;
354
355         if (data & BTRFS_BLOCK_GROUP_METADATA)
356                 factor = 9;
357
358         if (search_start) {
359                 struct btrfs_block_group_cache *shint;
360                 shint = btrfs_lookup_first_block_group(info, search_start);
361                 if (shint && block_group_bits(shint, data)) {
362                         spin_lock(&shint->lock);
363                         used = btrfs_block_group_used(&shint->item);
364                         if (used + shint->pinned + shint->reserved <
365                             div_factor(shint->key.offset, factor)) {
366                                 spin_unlock(&shint->lock);
367                                 return shint;
368                         }
369                         spin_unlock(&shint->lock);
370                 }
371         }
372         if (hint && block_group_bits(hint, data)) {
373                 spin_lock(&hint->lock);
374                 used = btrfs_block_group_used(&hint->item);
375                 if (used + hint->pinned + hint->reserved <
376                     div_factor(hint->key.offset, factor)) {
377                         spin_unlock(&hint->lock);
378                         return hint;
379                 }
380                 spin_unlock(&hint->lock);
381                 last = hint->key.objectid + hint->key.offset;
382         } else {
383                 if (hint)
384                         last = max(hint->key.objectid, search_start);
385                 else
386                         last = search_start;
387         }
388 again:
389         while (1) {
390                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
391                 if (!cache)
392                         break;
393
394                 spin_lock(&cache->lock);
395                 last = cache->key.objectid + cache->key.offset;
396                 used = btrfs_block_group_used(&cache->item);
397
398                 if (block_group_bits(cache, data)) {
399                         free_check = div_factor(cache->key.offset, factor);
400                         if (used + cache->pinned + cache->reserved <
401                             free_check) {
402                                 found_group = cache;
403                                 spin_unlock(&cache->lock);
404                                 goto found;
405                         }
406                 }
407                 spin_unlock(&cache->lock);
408                 cond_resched();
409         }
410         if (!wrapped) {
411                 last = search_start;
412                 wrapped = 1;
413                 goto again;
414         }
415         if (!full_search && factor < 10) {
416                 last = search_start;
417                 full_search = 1;
418                 factor = 10;
419                 goto again;
420         }
421 found:
422         return found_group;
423 }
424
425 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
426                                                  struct btrfs_block_group_cache
427                                                  *hint, u64 search_start,
428                                                  int data, int owner)
429 {
430
431         struct btrfs_block_group_cache *ret;
432         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
433         return ret;
434 }
435
436 /* simple helper to search for an existing extent at a given offset */
437 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
438 {
439         int ret;
440         struct btrfs_key key;
441         struct btrfs_path *path;
442
443         path = btrfs_alloc_path();
444         BUG_ON(!path);
445         key.objectid = start;
446         key.offset = len;
447         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
448         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
449                                 0, 0);
450         btrfs_free_path(path);
451         return ret;
452 }
453
454 /*
455  * Back reference rules.  Back refs have three main goals:
456  *
457  * 1) differentiate between all holders of references to an extent so that
458  *    when a reference is dropped we can make sure it was a valid reference
459  *    before freeing the extent.
460  *
461  * 2) Provide enough information to quickly find the holders of an extent
462  *    if we notice a given block is corrupted or bad.
463  *
464  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
465  *    maintenance.  This is actually the same as #2, but with a slightly
466  *    different use case.
467  *
468  * File extents can be referenced by:
469  *
470  * - multiple snapshots, subvolumes, or different generations in one subvol
471  * - different files inside a single subvolume
472  * - different offsets inside a file (bookend extents in file.c)
473  *
474  * The extent ref structure has fields for:
475  *
476  * - Objectid of the subvolume root
477  * - Generation number of the tree holding the reference
478  * - objectid of the file holding the reference
479  * - number of references holding by parent node (alway 1 for tree blocks)
480  *
481  * Btree leaf may hold multiple references to a file extent. In most cases,
482  * these references are from same file and the corresponding offsets inside
483  * the file are close together.
484  *
485  * When a file extent is allocated the fields are filled in:
486  *     (root_key.objectid, trans->transid, inode objectid, 1)
487  *
488  * When a leaf is cow'd new references are added for every file extent found
489  * in the leaf.  It looks similar to the create case, but trans->transid will
490  * be different when the block is cow'd.
491  *
492  *     (root_key.objectid, trans->transid, inode objectid,
493  *      number of references in the leaf)
494  *
495  * When a file extent is removed either during snapshot deletion or
496  * file truncation, we find the corresponding back reference and check
497  * the following fields:
498  *
499  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
500  *      inode objectid)
501  *
502  * Btree extents can be referenced by:
503  *
504  * - Different subvolumes
505  * - Different generations of the same subvolume
506  *
507  * When a tree block is created, back references are inserted:
508  *
509  * (root->root_key.objectid, trans->transid, level, 1)
510  *
511  * When a tree block is cow'd, new back references are added for all the
512  * blocks it points to. If the tree block isn't in reference counted root,
513  * the old back references are removed. These new back references are of
514  * the form (trans->transid will have increased since creation):
515  *
516  * (root->root_key.objectid, trans->transid, level, 1)
517  *
518  * When a backref is in deleting, the following fields are checked:
519  *
520  * if backref was for a tree root:
521  *     (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
522  * else
523  *     (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
524  *
525  * Back Reference Key composing:
526  *
527  * The key objectid corresponds to the first byte in the extent, the key
528  * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
529  * byte of parent extent. If a extent is tree root, the key offset is set
530  * to the key objectid.
531  */
532
533 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
534                                           struct btrfs_root *root,
535                                           struct btrfs_path *path,
536                                           u64 bytenr, u64 parent,
537                                           u64 ref_root, u64 ref_generation,
538                                           u64 owner_objectid, int del)
539 {
540         struct btrfs_key key;
541         struct btrfs_extent_ref *ref;
542         struct extent_buffer *leaf;
543         u64 ref_objectid;
544         int ret;
545
546         key.objectid = bytenr;
547         key.type = BTRFS_EXTENT_REF_KEY;
548         key.offset = parent;
549
550         ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
551         if (ret < 0)
552                 goto out;
553         if (ret > 0) {
554                 ret = -ENOENT;
555                 goto out;
556         }
557
558         leaf = path->nodes[0];
559         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
560         ref_objectid = btrfs_ref_objectid(leaf, ref);
561         if (btrfs_ref_root(leaf, ref) != ref_root ||
562             btrfs_ref_generation(leaf, ref) != ref_generation ||
563             (ref_objectid != owner_objectid &&
564              ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
565                 ret = -EIO;
566                 WARN_ON(1);
567                 goto out;
568         }
569         ret = 0;
570 out:
571         return ret;
572 }
573
574 /*
575  * updates all the backrefs that are pending on update_list for the
576  * extent_root
577  */
578 static int noinline update_backrefs(struct btrfs_trans_handle *trans,
579                                     struct btrfs_root *extent_root,
580                                     struct btrfs_path *path,
581                                     struct list_head *update_list)
582 {
583         struct btrfs_key key;
584         struct btrfs_extent_ref *ref;
585         struct btrfs_fs_info *info = extent_root->fs_info;
586         struct pending_extent_op *op;
587         struct extent_buffer *leaf;
588         int ret = 0;
589         struct list_head *cur = update_list->next;
590         u64 ref_objectid;
591         u64 ref_root = extent_root->root_key.objectid;
592
593         op = list_entry(cur, struct pending_extent_op, list);
594
595 search:
596         key.objectid = op->bytenr;
597         key.type = BTRFS_EXTENT_REF_KEY;
598         key.offset = op->orig_parent;
599
600         ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
601         BUG_ON(ret);
602
603         leaf = path->nodes[0];
604
605 loop:
606         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
607
608         ref_objectid = btrfs_ref_objectid(leaf, ref);
609
610         if (btrfs_ref_root(leaf, ref) != ref_root ||
611             btrfs_ref_generation(leaf, ref) != op->orig_generation ||
612             (ref_objectid != op->level &&
613              ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
614                 printk(KERN_ERR "couldn't find %Lu, parent %Lu, root %Lu, "
615                        "owner %u\n", op->bytenr, op->orig_parent,
616                        ref_root, op->level);
617                 btrfs_print_leaf(extent_root, leaf);
618                 BUG();
619         }
620
621         key.objectid = op->bytenr;
622         key.offset = op->parent;
623         key.type = BTRFS_EXTENT_REF_KEY;
624         ret = btrfs_set_item_key_safe(trans, extent_root, path, &key);
625         BUG_ON(ret);
626         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
627         btrfs_set_ref_generation(leaf, ref, op->generation);
628
629         cur = cur->next;
630
631         list_del_init(&op->list);
632         unlock_extent(&info->extent_ins, op->bytenr,
633                       op->bytenr + op->num_bytes - 1, GFP_NOFS);
634         kfree(op);
635
636         if (cur == update_list) {
637                 btrfs_mark_buffer_dirty(path->nodes[0]);
638                 btrfs_release_path(extent_root, path);
639                 goto out;
640         }
641
642         op = list_entry(cur, struct pending_extent_op, list);
643
644         path->slots[0]++;
645         while (path->slots[0] < btrfs_header_nritems(leaf)) {
646                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
647                 if (key.objectid == op->bytenr &&
648                     key.type == BTRFS_EXTENT_REF_KEY)
649                         goto loop;
650                 path->slots[0]++;
651         }
652
653         btrfs_mark_buffer_dirty(path->nodes[0]);
654         btrfs_release_path(extent_root, path);
655         goto search;
656
657 out:
658         return 0;
659 }
660
661 static int noinline insert_extents(struct btrfs_trans_handle *trans,
662                                    struct btrfs_root *extent_root,
663                                    struct btrfs_path *path,
664                                    struct list_head *insert_list, int nr)
665 {
666         struct btrfs_key *keys;
667         u32 *data_size;
668         struct pending_extent_op *op;
669         struct extent_buffer *leaf;
670         struct list_head *cur = insert_list->next;
671         struct btrfs_fs_info *info = extent_root->fs_info;
672         u64 ref_root = extent_root->root_key.objectid;
673         int i = 0, last = 0, ret;
674         int total = nr * 2;
675
676         if (!nr)
677                 return 0;
678
679         keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS);
680         if (!keys)
681                 return -ENOMEM;
682
683         data_size = kzalloc(total * sizeof(u32), GFP_NOFS);
684         if (!data_size) {
685                 kfree(keys);
686                 return -ENOMEM;
687         }
688
689         list_for_each_entry(op, insert_list, list) {
690                 keys[i].objectid = op->bytenr;
691                 keys[i].offset = op->num_bytes;
692                 keys[i].type = BTRFS_EXTENT_ITEM_KEY;
693                 data_size[i] = sizeof(struct btrfs_extent_item);
694                 i++;
695
696                 keys[i].objectid = op->bytenr;
697                 keys[i].offset = op->parent;
698                 keys[i].type = BTRFS_EXTENT_REF_KEY;
699                 data_size[i] = sizeof(struct btrfs_extent_ref);
700                 i++;
701         }
702
703         op = list_entry(cur, struct pending_extent_op, list);
704         i = 0;
705         while (i < total) {
706                 int c;
707                 ret = btrfs_insert_some_items(trans, extent_root, path,
708                                               keys+i, data_size+i, total-i);
709                 BUG_ON(ret < 0);
710
711                 if (last && ret > 1)
712                         BUG();
713
714                 leaf = path->nodes[0];
715                 for (c = 0; c < ret; c++) {
716                         int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY;
717
718                         /*
719                          * if the first item we inserted was a backref, then
720                          * the EXTENT_ITEM will be the odd c's, else it will
721                          * be the even c's
722                          */
723                         if ((ref_first && (c % 2)) ||
724                             (!ref_first && !(c % 2))) {
725                                 struct btrfs_extent_item *itm;
726
727                                 itm = btrfs_item_ptr(leaf, path->slots[0] + c,
728                                                      struct btrfs_extent_item);
729                                 btrfs_set_extent_refs(path->nodes[0], itm, 1);
730                                 op->del++;
731                         } else {
732                                 struct btrfs_extent_ref *ref;
733
734                                 ref = btrfs_item_ptr(leaf, path->slots[0] + c,
735                                                      struct btrfs_extent_ref);
736                                 btrfs_set_ref_root(leaf, ref, ref_root);
737                                 btrfs_set_ref_generation(leaf, ref,
738                                                          op->generation);
739                                 btrfs_set_ref_objectid(leaf, ref, op->level);
740                                 btrfs_set_ref_num_refs(leaf, ref, 1);
741                                 op->del++;
742                         }
743
744                         /*
745                          * using del to see when its ok to free up the
746                          * pending_extent_op.  In the case where we insert the
747                          * last item on the list in order to help do batching
748                          * we need to not free the extent op until we actually
749                          * insert the extent_item
750                          */
751                         if (op->del == 2) {
752                                 unlock_extent(&info->extent_ins, op->bytenr,
753                                               op->bytenr + op->num_bytes - 1,
754                                               GFP_NOFS);
755                                 cur = cur->next;
756                                 list_del_init(&op->list);
757                                 kfree(op);
758                                 if (cur != insert_list)
759                                         op = list_entry(cur,
760                                                 struct pending_extent_op,
761                                                 list);
762                         }
763                 }
764                 btrfs_mark_buffer_dirty(leaf);
765                 btrfs_release_path(extent_root, path);
766
767                 /*
768                  * Ok backref's and items usually go right next to eachother,
769                  * but if we could only insert 1 item that means that we
770                  * inserted on the end of a leaf, and we have no idea what may
771                  * be on the next leaf so we just play it safe.  In order to
772                  * try and help this case we insert the last thing on our
773                  * insert list so hopefully it will end up being the last
774                  * thing on the leaf and everything else will be before it,
775                  * which will let us insert a whole bunch of items at the same
776                  * time.
777                  */
778                 if (ret == 1 && !last && (i + ret < total)) {
779                         /*
780                          * last: where we will pick up the next time around
781                          * i: our current key to insert, will be total - 1
782                          * cur: the current op we are screwing with
783                          * op: duh
784                          */
785                         last = i + ret;
786                         i = total - 1;
787                         cur = insert_list->prev;
788                         op = list_entry(cur, struct pending_extent_op, list);
789                 } else if (last) {
790                         /*
791                          * ok we successfully inserted the last item on the
792                          * list, lets reset everything
793                          *
794                          * i: our current key to insert, so where we left off
795                          *    last time
796                          * last: done with this
797                          * cur: the op we are messing with
798                          * op: duh
799                          * total: since we inserted the last key, we need to
800                          *        decrement total so we dont overflow
801                          */
802                         i = last;
803                         last = 0;
804                         total--;
805                         if (i < total) {
806                                 cur = insert_list->next;
807                                 op = list_entry(cur, struct pending_extent_op,
808                                                 list);
809                         }
810                 } else {
811                         i += ret;
812                 }
813
814                 cond_resched();
815         }
816         ret = 0;
817         kfree(keys);
818         kfree(data_size);
819         return ret;
820 }
821
822 static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
823                                           struct btrfs_root *root,
824                                           struct btrfs_path *path,
825                                           u64 bytenr, u64 parent,
826                                           u64 ref_root, u64 ref_generation,
827                                           u64 owner_objectid)
828 {
829         struct btrfs_key key;
830         struct extent_buffer *leaf;
831         struct btrfs_extent_ref *ref;
832         u32 num_refs;
833         int ret;
834
835         key.objectid = bytenr;
836         key.type = BTRFS_EXTENT_REF_KEY;
837         key.offset = parent;
838
839         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
840         if (ret == 0) {
841                 leaf = path->nodes[0];
842                 ref = btrfs_item_ptr(leaf, path->slots[0],
843                                      struct btrfs_extent_ref);
844                 btrfs_set_ref_root(leaf, ref, ref_root);
845                 btrfs_set_ref_generation(leaf, ref, ref_generation);
846                 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
847                 btrfs_set_ref_num_refs(leaf, ref, 1);
848         } else if (ret == -EEXIST) {
849                 u64 existing_owner;
850                 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
851                 leaf = path->nodes[0];
852                 ref = btrfs_item_ptr(leaf, path->slots[0],
853                                      struct btrfs_extent_ref);
854                 if (btrfs_ref_root(leaf, ref) != ref_root ||
855                     btrfs_ref_generation(leaf, ref) != ref_generation) {
856                         ret = -EIO;
857                         WARN_ON(1);
858                         goto out;
859                 }
860
861                 num_refs = btrfs_ref_num_refs(leaf, ref);
862                 BUG_ON(num_refs == 0);
863                 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
864
865                 existing_owner = btrfs_ref_objectid(leaf, ref);
866                 if (existing_owner != owner_objectid &&
867                     existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
868                         btrfs_set_ref_objectid(leaf, ref,
869                                         BTRFS_MULTIPLE_OBJECTIDS);
870                 }
871                 ret = 0;
872         } else {
873                 goto out;
874         }
875         btrfs_mark_buffer_dirty(path->nodes[0]);
876 out:
877         btrfs_release_path(root, path);
878         return ret;
879 }
880
881 static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
882                                           struct btrfs_root *root,
883                                           struct btrfs_path *path)
884 {
885         struct extent_buffer *leaf;
886         struct btrfs_extent_ref *ref;
887         u32 num_refs;
888         int ret = 0;
889
890         leaf = path->nodes[0];
891         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
892         num_refs = btrfs_ref_num_refs(leaf, ref);
893         BUG_ON(num_refs == 0);
894         num_refs -= 1;
895         if (num_refs == 0) {
896                 ret = btrfs_del_item(trans, root, path);
897         } else {
898                 btrfs_set_ref_num_refs(leaf, ref, num_refs);
899                 btrfs_mark_buffer_dirty(leaf);
900         }
901         btrfs_release_path(root, path);
902         return ret;
903 }
904
905 #ifdef BIO_RW_DISCARD
906 static void btrfs_issue_discard(struct block_device *bdev,
907                                 u64 start, u64 len)
908 {
909 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)
910         blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
911 #else
912         blkdev_issue_discard(bdev, start >> 9, len >> 9);
913 #endif
914 }
915 #endif
916
917 static int noinline free_extents(struct btrfs_trans_handle *trans,
918                                  struct btrfs_root *extent_root,
919                                  struct list_head *del_list)
920 {
921         struct btrfs_fs_info *info = extent_root->fs_info;
922         struct btrfs_path *path;
923         struct btrfs_key key, found_key;
924         struct extent_buffer *leaf;
925         struct list_head *cur;
926         struct pending_extent_op *op;
927         struct btrfs_extent_item *ei;
928         int ret, num_to_del, extent_slot = 0, found_extent = 0;
929         u32 refs;
930         u64 bytes_freed = 0;
931
932         path = btrfs_alloc_path();
933         if (!path)
934                 return -ENOMEM;
935         path->reada = 1;
936
937 search:
938         /* search for the backref for the current ref we want to delete */
939         cur = del_list->next;
940         op = list_entry(cur, struct pending_extent_op, list);
941         ret = lookup_extent_backref(trans, extent_root, path, op->bytenr,
942                                     op->orig_parent,
943                                     extent_root->root_key.objectid,
944                                     op->orig_generation, op->level, 1);
945         if (ret) {
946                 printk("Unable to find backref byte nr %Lu root %Lu gen %Lu "
947                        "owner %u\n", op->bytenr,
948                        extent_root->root_key.objectid, op->orig_generation,
949                        op->level);
950                 btrfs_print_leaf(extent_root, path->nodes[0]);
951                 WARN_ON(1);
952                 goto out;
953         }
954
955         extent_slot = path->slots[0];
956         num_to_del = 1;
957         found_extent = 0;
958
959         /*
960          * if we aren't the first item on the leaf we can move back one and see
961          * if our ref is right next to our extent item
962          */
963         if (likely(extent_slot)) {
964                 extent_slot--;
965                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
966                                       extent_slot);
967                 if (found_key.objectid == op->bytenr &&
968                     found_key.type == BTRFS_EXTENT_ITEM_KEY &&
969                     found_key.offset == op->num_bytes) {
970                         num_to_del++;
971                         found_extent = 1;
972                 }
973         }
974
975         /*
976          * if we didn't find the extent we need to delete the backref and then
977          * search for the extent item key so we can update its ref count
978          */
979         if (!found_extent) {
980                 key.objectid = op->bytenr;
981                 key.type = BTRFS_EXTENT_ITEM_KEY;
982                 key.offset = op->num_bytes;
983
984                 ret = remove_extent_backref(trans, extent_root, path);
985                 BUG_ON(ret);
986                 btrfs_release_path(extent_root, path);
987                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
988                 BUG_ON(ret);
989                 extent_slot = path->slots[0];
990         }
991
992         /* this is where we update the ref count for the extent */
993         leaf = path->nodes[0];
994         ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item);
995         refs = btrfs_extent_refs(leaf, ei);
996         BUG_ON(refs == 0);
997         refs--;
998         btrfs_set_extent_refs(leaf, ei, refs);
999
1000         btrfs_mark_buffer_dirty(leaf);
1001
1002         /*
1003          * This extent needs deleting.  The reason cur_slot is extent_slot +
1004          * num_to_del is because extent_slot points to the slot where the extent
1005          * is, and if the backref was not right next to the extent we will be
1006          * deleting at least 1 item, and will want to start searching at the
1007          * slot directly next to extent_slot.  However if we did find the
1008          * backref next to the extent item them we will be deleting at least 2
1009          * items and will want to start searching directly after the ref slot
1010          */
1011         if (!refs) {
1012                 struct list_head *pos, *n, *end;
1013                 int cur_slot = extent_slot+num_to_del;
1014                 u64 super_used;
1015                 u64 root_used;
1016
1017                 path->slots[0] = extent_slot;
1018                 bytes_freed = op->num_bytes;
1019
1020                 mutex_lock(&info->pinned_mutex);
1021                 ret = pin_down_bytes(trans, extent_root, op->bytenr,
1022                                      op->num_bytes, op->level >=
1023                                      BTRFS_FIRST_FREE_OBJECTID);
1024                 mutex_unlock(&info->pinned_mutex);
1025                 BUG_ON(ret < 0);
1026                 op->del = ret;
1027
1028                 /*
1029                  * we need to see if we can delete multiple things at once, so
1030                  * start looping through the list of extents we are wanting to
1031                  * delete and see if their extent/backref's are right next to
1032                  * eachother and the extents only have 1 ref
1033                  */
1034                 for (pos = cur->next; pos != del_list; pos = pos->next) {
1035                         struct pending_extent_op *tmp;
1036
1037                         tmp = list_entry(pos, struct pending_extent_op, list);
1038
1039                         /* we only want to delete extent+ref at this stage */
1040                         if (cur_slot >= btrfs_header_nritems(leaf) - 1)
1041                                 break;
1042
1043                         btrfs_item_key_to_cpu(leaf, &found_key, cur_slot);
1044                         if (found_key.objectid != tmp->bytenr ||
1045                             found_key.type != BTRFS_EXTENT_ITEM_KEY ||
1046                             found_key.offset != tmp->num_bytes)
1047                                 break;
1048
1049                         /* check to make sure this extent only has one ref */
1050                         ei = btrfs_item_ptr(leaf, cur_slot,
1051                                             struct btrfs_extent_item);
1052                         if (btrfs_extent_refs(leaf, ei) != 1)
1053                                 break;
1054
1055                         btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1);
1056                         if (found_key.objectid != tmp->bytenr ||
1057                             found_key.type != BTRFS_EXTENT_REF_KEY ||
1058                             found_key.offset != tmp->orig_parent)
1059                                 break;
1060
1061                         /*
1062                          * the ref is right next to the extent, we can set the
1063                          * ref count to 0 since we will delete them both now
1064                          */
1065                         btrfs_set_extent_refs(leaf, ei, 0);
1066
1067                         /* pin down the bytes for this extent */
1068                         mutex_lock(&info->pinned_mutex);
1069                         ret = pin_down_bytes(trans, extent_root, tmp->bytenr,
1070                                              tmp->num_bytes, tmp->level >=
1071                                              BTRFS_FIRST_FREE_OBJECTID);
1072                         mutex_unlock(&info->pinned_mutex);
1073                         BUG_ON(ret < 0);
1074
1075                         /*
1076                          * use the del field to tell if we need to go ahead and
1077                          * free up the extent when we delete the item or not.
1078                          */
1079                         tmp->del = ret;
1080                         bytes_freed += tmp->num_bytes;
1081
1082                         num_to_del += 2;
1083                         cur_slot += 2;
1084                 }
1085                 end = pos;
1086
1087                 /* update the free space counters */
1088                 spin_lock_irq(&info->delalloc_lock);
1089                 super_used = btrfs_super_bytes_used(&info->super_copy);
1090                 btrfs_set_super_bytes_used(&info->super_copy,
1091                                            super_used - bytes_freed);
1092                 spin_unlock_irq(&info->delalloc_lock);
1093
1094                 root_used = btrfs_root_used(&extent_root->root_item);
1095                 btrfs_set_root_used(&extent_root->root_item,
1096                                     root_used - bytes_freed);
1097
1098                 /* delete the items */
1099                 ret = btrfs_del_items(trans, extent_root, path,
1100                                       path->slots[0], num_to_del);
1101                 BUG_ON(ret);
1102
1103                 /*
1104                  * loop through the extents we deleted and do the cleanup work
1105                  * on them
1106                  */
1107                 for (pos = cur, n = pos->next; pos != end;
1108                      pos = n, n = pos->next) {
1109                         struct pending_extent_op *tmp;
1110 #ifdef BIO_RW_DISCARD
1111                         u64 map_length;
1112                         struct btrfs_multi_bio *multi = NULL;
1113 #endif
1114                         tmp = list_entry(pos, struct pending_extent_op, list);
1115
1116                         /*
1117                          * remember tmp->del tells us wether or not we pinned
1118                          * down the extent
1119                          */
1120                         ret = update_block_group(trans, extent_root,
1121                                                  tmp->bytenr, tmp->num_bytes, 0,
1122                                                  tmp->del);
1123                         BUG_ON(ret);
1124
1125 #ifdef BIO_RW_DISCARD
1126                         map_length = tmp->num_bytes;
1127                         ret = btrfs_map_block(&info->mapping_tree, READ,
1128                                               tmp->bytenr, &map_length, &multi,
1129                                               0);
1130                         if (!ret) {
1131                                 struct btrfs_bio_stripe *stripe;
1132                                 int i;
1133
1134                                 stripe = multi->stripes;
1135
1136                                 if (map_length > tmp->num_bytes)
1137                                         map_length = tmp->num_bytes;
1138
1139                                 for (i = 0; i < multi->num_stripes;
1140                                      i++, stripe++)
1141                                         btrfs_issue_discard(stripe->dev->bdev,
1142                                                             stripe->physical,
1143                                                             map_length);
1144                                 kfree(multi);
1145                         }
1146 #endif
1147                         list_del_init(&tmp->list);
1148                         unlock_extent(&info->extent_ins, tmp->bytenr,
1149                                       tmp->bytenr + tmp->num_bytes - 1,
1150                                       GFP_NOFS);
1151                         kfree(tmp);
1152                 }
1153         } else if (refs && found_extent) {
1154                 /*
1155                  * the ref and extent were right next to eachother, but the
1156                  * extent still has a ref, so just free the backref and keep
1157                  * going
1158                  */
1159                 ret = remove_extent_backref(trans, extent_root, path);
1160                 BUG_ON(ret);
1161
1162                 list_del_init(&op->list);
1163                 unlock_extent(&info->extent_ins, op->bytenr,
1164                               op->bytenr + op->num_bytes - 1, GFP_NOFS);
1165                 kfree(op);
1166         } else {
1167                 /*
1168                  * the extent has multiple refs and the backref we were looking
1169                  * for was not right next to it, so just unlock and go next,
1170                  * we're good to go
1171                  */
1172                 list_del_init(&op->list);
1173                 unlock_extent(&info->extent_ins, op->bytenr,
1174                               op->bytenr + op->num_bytes - 1, GFP_NOFS);
1175                 kfree(op);
1176         }
1177
1178         btrfs_release_path(extent_root, path);
1179         if (!list_empty(del_list))
1180                 goto search;
1181
1182 out:
1183         btrfs_free_path(path);
1184         return ret;
1185 }
1186
1187 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1188                                      struct btrfs_root *root, u64 bytenr,
1189                                      u64 orig_parent, u64 parent,
1190                                      u64 orig_root, u64 ref_root,
1191                                      u64 orig_generation, u64 ref_generation,
1192                                      u64 owner_objectid)
1193 {
1194         int ret;
1195         struct btrfs_root *extent_root = root->fs_info->extent_root;
1196         struct btrfs_path *path;
1197
1198         if (root == root->fs_info->extent_root) {
1199                 struct pending_extent_op *extent_op;
1200                 u64 num_bytes;
1201
1202                 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
1203                 num_bytes = btrfs_level_size(root, (int)owner_objectid);
1204                 mutex_lock(&root->fs_info->extent_ins_mutex);
1205                 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
1206                                 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
1207                         u64 priv;
1208                         ret = get_state_private(&root->fs_info->extent_ins,
1209                                                 bytenr, &priv);
1210                         BUG_ON(ret);
1211                         extent_op = (struct pending_extent_op *)
1212                                                         (unsigned long)priv;
1213                         BUG_ON(extent_op->parent != orig_parent);
1214                         BUG_ON(extent_op->generation != orig_generation);
1215
1216                         extent_op->parent = parent;
1217                         extent_op->generation = ref_generation;
1218                 } else {
1219                         extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
1220                         BUG_ON(!extent_op);
1221
1222                         extent_op->type = PENDING_BACKREF_UPDATE;
1223                         extent_op->bytenr = bytenr;
1224                         extent_op->num_bytes = num_bytes;
1225                         extent_op->parent = parent;
1226                         extent_op->orig_parent = orig_parent;
1227                         extent_op->generation = ref_generation;
1228                         extent_op->orig_generation = orig_generation;
1229                         extent_op->level = (int)owner_objectid;
1230                         INIT_LIST_HEAD(&extent_op->list);
1231                         extent_op->del = 0;
1232
1233                         set_extent_bits(&root->fs_info->extent_ins,
1234                                         bytenr, bytenr + num_bytes - 1,
1235                                         EXTENT_WRITEBACK, GFP_NOFS);
1236                         set_state_private(&root->fs_info->extent_ins,
1237                                           bytenr, (unsigned long)extent_op);
1238                 }
1239                 mutex_unlock(&root->fs_info->extent_ins_mutex);
1240                 return 0;
1241         }
1242
1243         path = btrfs_alloc_path();
1244         if (!path)
1245                 return -ENOMEM;
1246         ret = lookup_extent_backref(trans, extent_root, path,
1247                                     bytenr, orig_parent, orig_root,
1248                                     orig_generation, owner_objectid, 1);
1249         if (ret)
1250                 goto out;
1251         ret = remove_extent_backref(trans, extent_root, path);
1252         if (ret)
1253                 goto out;
1254         ret = insert_extent_backref(trans, extent_root, path, bytenr,
1255                                     parent, ref_root, ref_generation,
1256                                     owner_objectid);
1257         BUG_ON(ret);
1258         finish_current_insert(trans, extent_root, 0);
1259         del_pending_extents(trans, extent_root, 0);
1260 out:
1261         btrfs_free_path(path);
1262         return ret;
1263 }
1264
1265 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1266                             struct btrfs_root *root, u64 bytenr,
1267                             u64 orig_parent, u64 parent,
1268                             u64 ref_root, u64 ref_generation,
1269                             u64 owner_objectid)
1270 {
1271         int ret;
1272         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1273             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1274                 return 0;
1275         ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
1276                                         parent, ref_root, ref_root,
1277                                         ref_generation, ref_generation,
1278                                         owner_objectid);
1279         return ret;
1280 }
1281
1282 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1283                                   struct btrfs_root *root, u64 bytenr,
1284                                   u64 orig_parent, u64 parent,
1285                                   u64 orig_root, u64 ref_root,
1286                                   u64 orig_generation, u64 ref_generation,
1287                                   u64 owner_objectid)
1288 {
1289         struct btrfs_path *path;
1290         int ret;
1291         struct btrfs_key key;
1292         struct extent_buffer *l;
1293         struct btrfs_extent_item *item;
1294         u32 refs;
1295
1296         path = btrfs_alloc_path();
1297         if (!path)
1298                 return -ENOMEM;
1299
1300         path->reada = 1;
1301         key.objectid = bytenr;
1302         key.type = BTRFS_EXTENT_ITEM_KEY;
1303         key.offset = (u64)-1;
1304
1305         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1306                                 0, 1);
1307         if (ret < 0)
1308                 return ret;
1309         BUG_ON(ret == 0 || path->slots[0] == 0);
1310
1311         path->slots[0]--;
1312         l = path->nodes[0];
1313
1314         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1315         if (key.objectid != bytenr) {
1316                 btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]);
1317                 printk("wanted %Lu found %Lu\n", bytenr, key.objectid);
1318                 BUG();
1319         }
1320         BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
1321
1322         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1323         refs = btrfs_extent_refs(l, item);
1324         btrfs_set_extent_refs(l, item, refs + 1);
1325         btrfs_mark_buffer_dirty(path->nodes[0]);
1326
1327         btrfs_release_path(root->fs_info->extent_root, path);
1328
1329         path->reada = 1;
1330         ret = insert_extent_backref(trans, root->fs_info->extent_root,
1331                                     path, bytenr, parent,
1332                                     ref_root, ref_generation,
1333                                     owner_objectid);
1334         BUG_ON(ret);
1335         finish_current_insert(trans, root->fs_info->extent_root, 0);
1336         del_pending_extents(trans, root->fs_info->extent_root, 0);
1337
1338         btrfs_free_path(path);
1339         return 0;
1340 }
1341
1342 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1343                          struct btrfs_root *root,
1344                          u64 bytenr, u64 num_bytes, u64 parent,
1345                          u64 ref_root, u64 ref_generation,
1346                          u64 owner_objectid)
1347 {
1348         int ret;
1349         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1350             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1351                 return 0;
1352         ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
1353                                      0, ref_root, 0, ref_generation,
1354                                      owner_objectid);
1355         return ret;
1356 }
1357
1358 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1359                          struct btrfs_root *root)
1360 {
1361         finish_current_insert(trans, root->fs_info->extent_root, 1);
1362         del_pending_extents(trans, root->fs_info->extent_root, 1);
1363         return 0;
1364 }
1365
1366 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
1367                             struct btrfs_root *root, u64 bytenr,
1368                             u64 num_bytes, u32 *refs)
1369 {
1370         struct btrfs_path *path;
1371         int ret;
1372         struct btrfs_key key;
1373         struct extent_buffer *l;
1374         struct btrfs_extent_item *item;
1375
1376         WARN_ON(num_bytes < root->sectorsize);
1377         path = btrfs_alloc_path();
1378         path->reada = 1;
1379         key.objectid = bytenr;
1380         key.offset = num_bytes;
1381         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1382         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1383                                 0, 0);
1384         if (ret < 0)
1385                 goto out;
1386         if (ret != 0) {
1387                 btrfs_print_leaf(root, path->nodes[0]);
1388                 printk("failed to find block number %Lu\n", bytenr);
1389                 BUG();
1390         }
1391         l = path->nodes[0];
1392         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1393         *refs = btrfs_extent_refs(l, item);
1394 out:
1395         btrfs_free_path(path);
1396         return 0;
1397 }
1398
1399 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1400                           struct btrfs_root *root, u64 bytenr)
1401 {
1402         struct btrfs_root *extent_root = root->fs_info->extent_root;
1403         struct btrfs_path *path;
1404         struct extent_buffer *leaf;
1405         struct btrfs_extent_ref *ref_item;
1406         struct btrfs_key key;
1407         struct btrfs_key found_key;
1408         u64 ref_root;
1409         u64 last_snapshot;
1410         u32 nritems;
1411         int ret;
1412
1413         key.objectid = bytenr;
1414         key.offset = (u64)-1;
1415         key.type = BTRFS_EXTENT_ITEM_KEY;
1416
1417         path = btrfs_alloc_path();
1418         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1419         if (ret < 0)
1420                 goto out;
1421         BUG_ON(ret == 0);
1422
1423         ret = -ENOENT;
1424         if (path->slots[0] == 0)
1425                 goto out;
1426
1427         path->slots[0]--;
1428         leaf = path->nodes[0];
1429         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1430
1431         if (found_key.objectid != bytenr ||
1432             found_key.type != BTRFS_EXTENT_ITEM_KEY)
1433                 goto out;
1434
1435         last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1436         while (1) {
1437                 leaf = path->nodes[0];
1438                 nritems = btrfs_header_nritems(leaf);
1439                 if (path->slots[0] >= nritems) {
1440                         ret = btrfs_next_leaf(extent_root, path);
1441                         if (ret < 0)
1442                                 goto out;
1443                         if (ret == 0)
1444                                 continue;
1445                         break;
1446                 }
1447                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1448                 if (found_key.objectid != bytenr)
1449                         break;
1450
1451                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
1452                         path->slots[0]++;
1453                         continue;
1454                 }
1455
1456                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1457                                           struct btrfs_extent_ref);
1458                 ref_root = btrfs_ref_root(leaf, ref_item);
1459                 if (ref_root != root->root_key.objectid &&
1460                     ref_root != BTRFS_TREE_LOG_OBJECTID) {
1461                         ret = 1;
1462                         goto out;
1463                 }
1464                 if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) {
1465                         ret = 1;
1466                         goto out;
1467                 }
1468
1469                 path->slots[0]++;
1470         }
1471         ret = 0;
1472 out:
1473         btrfs_free_path(path);
1474         return ret;
1475 }
1476
1477 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1478                     struct extent_buffer *buf, u32 nr_extents)
1479 {
1480         struct btrfs_key key;
1481         struct btrfs_file_extent_item *fi;
1482         u64 root_gen;
1483         u32 nritems;
1484         int i;
1485         int level;
1486         int ret = 0;
1487         int shared = 0;
1488
1489         if (!root->ref_cows)
1490                 return 0;
1491
1492         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1493                 shared = 0;
1494                 root_gen = root->root_key.offset;
1495         } else {
1496                 shared = 1;
1497                 root_gen = trans->transid - 1;
1498         }
1499
1500         level = btrfs_header_level(buf);
1501         nritems = btrfs_header_nritems(buf);
1502
1503         if (level == 0) {
1504                 struct btrfs_leaf_ref *ref;
1505                 struct btrfs_extent_info *info;
1506
1507                 ref = btrfs_alloc_leaf_ref(root, nr_extents);
1508                 if (!ref) {
1509                         ret = -ENOMEM;
1510                         goto out;
1511                 }
1512
1513                 ref->root_gen = root_gen;
1514                 ref->bytenr = buf->start;
1515                 ref->owner = btrfs_header_owner(buf);
1516                 ref->generation = btrfs_header_generation(buf);
1517                 ref->nritems = nr_extents;
1518                 info = ref->extents;
1519
1520                 for (i = 0; nr_extents > 0 && i < nritems; i++) {
1521                         u64 disk_bytenr;
1522                         btrfs_item_key_to_cpu(buf, &key, i);
1523                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1524                                 continue;
1525                         fi = btrfs_item_ptr(buf, i,
1526                                             struct btrfs_file_extent_item);
1527                         if (btrfs_file_extent_type(buf, fi) ==
1528                             BTRFS_FILE_EXTENT_INLINE)
1529                                 continue;
1530                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1531                         if (disk_bytenr == 0)
1532                                 continue;
1533
1534                         info->bytenr = disk_bytenr;
1535                         info->num_bytes =
1536                                 btrfs_file_extent_disk_num_bytes(buf, fi);
1537                         info->objectid = key.objectid;
1538                         info->offset = key.offset;
1539                         info++;
1540                 }
1541
1542                 ret = btrfs_add_leaf_ref(root, ref, shared);
1543                 if (ret == -EEXIST && shared) {
1544                         struct btrfs_leaf_ref *old;
1545                         old = btrfs_lookup_leaf_ref(root, ref->bytenr);
1546                         BUG_ON(!old);
1547                         btrfs_remove_leaf_ref(root, old);
1548                         btrfs_free_leaf_ref(root, old);
1549                         ret = btrfs_add_leaf_ref(root, ref, shared);
1550                 }
1551                 WARN_ON(ret);
1552                 btrfs_free_leaf_ref(root, ref);
1553         }
1554 out:
1555         return ret;
1556 }
1557
1558 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1559                   struct extent_buffer *orig_buf, struct extent_buffer *buf,
1560                   u32 *nr_extents)
1561 {
1562         u64 bytenr;
1563         u64 ref_root;
1564         u64 orig_root;
1565         u64 ref_generation;
1566         u64 orig_generation;
1567         u32 nritems;
1568         u32 nr_file_extents = 0;
1569         struct btrfs_key key;
1570         struct btrfs_file_extent_item *fi;
1571         int i;
1572         int level;
1573         int ret = 0;
1574         int faili = 0;
1575         int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1576                             u64, u64, u64, u64, u64, u64, u64, u64);
1577
1578         ref_root = btrfs_header_owner(buf);
1579         ref_generation = btrfs_header_generation(buf);
1580         orig_root = btrfs_header_owner(orig_buf);
1581         orig_generation = btrfs_header_generation(orig_buf);
1582
1583         nritems = btrfs_header_nritems(buf);
1584         level = btrfs_header_level(buf);
1585
1586         if (root->ref_cows) {
1587                 process_func = __btrfs_inc_extent_ref;
1588         } else {
1589                 if (level == 0 &&
1590                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1591                         goto out;
1592                 if (level != 0 &&
1593                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1594                         goto out;
1595                 process_func = __btrfs_update_extent_ref;
1596         }
1597
1598         for (i = 0; i < nritems; i++) {
1599                 cond_resched();
1600                 if (level == 0) {
1601                         btrfs_item_key_to_cpu(buf, &key, i);
1602                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1603                                 continue;
1604                         fi = btrfs_item_ptr(buf, i,
1605                                             struct btrfs_file_extent_item);
1606                         if (btrfs_file_extent_type(buf, fi) ==
1607                             BTRFS_FILE_EXTENT_INLINE)
1608                                 continue;
1609                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1610                         if (bytenr == 0)
1611                                 continue;
1612
1613                         nr_file_extents++;
1614
1615                         ret = process_func(trans, root, bytenr,
1616                                            orig_buf->start, buf->start,
1617                                            orig_root, ref_root,
1618                                            orig_generation, ref_generation,
1619                                            key.objectid);
1620
1621                         if (ret) {
1622                                 faili = i;
1623                                 WARN_ON(1);
1624                                 goto fail;
1625                         }
1626                 } else {
1627                         bytenr = btrfs_node_blockptr(buf, i);
1628                         ret = process_func(trans, root, bytenr,
1629                                            orig_buf->start, buf->start,
1630                                            orig_root, ref_root,
1631                                            orig_generation, ref_generation,
1632                                            level - 1);
1633                         if (ret) {
1634                                 faili = i;
1635                                 WARN_ON(1);
1636                                 goto fail;
1637                         }
1638                 }
1639         }
1640 out:
1641         if (nr_extents) {
1642                 if (level == 0)
1643                         *nr_extents = nr_file_extents;
1644                 else
1645                         *nr_extents = nritems;
1646         }
1647         return 0;
1648 fail:
1649         WARN_ON(1);
1650         return ret;
1651 }
1652
1653 int btrfs_update_ref(struct btrfs_trans_handle *trans,
1654                      struct btrfs_root *root, struct extent_buffer *orig_buf,
1655                      struct extent_buffer *buf, int start_slot, int nr)
1656
1657 {
1658         u64 bytenr;
1659         u64 ref_root;
1660         u64 orig_root;
1661         u64 ref_generation;
1662         u64 orig_generation;
1663         struct btrfs_key key;
1664         struct btrfs_file_extent_item *fi;
1665         int i;
1666         int ret;
1667         int slot;
1668         int level;
1669
1670         BUG_ON(start_slot < 0);
1671         BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1672
1673         ref_root = btrfs_header_owner(buf);
1674         ref_generation = btrfs_header_generation(buf);
1675         orig_root = btrfs_header_owner(orig_buf);
1676         orig_generation = btrfs_header_generation(orig_buf);
1677         level = btrfs_header_level(buf);
1678
1679         if (!root->ref_cows) {
1680                 if (level == 0 &&
1681                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1682                         return 0;
1683                 if (level != 0 &&
1684                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1685                         return 0;
1686         }
1687
1688         for (i = 0, slot = start_slot; i < nr; i++, slot++) {
1689                 cond_resched();
1690                 if (level == 0) {
1691                         btrfs_item_key_to_cpu(buf, &key, slot);
1692                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1693                                 continue;
1694                         fi = btrfs_item_ptr(buf, slot,
1695                                             struct btrfs_file_extent_item);
1696                         if (btrfs_file_extent_type(buf, fi) ==
1697                             BTRFS_FILE_EXTENT_INLINE)
1698                                 continue;
1699                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1700                         if (bytenr == 0)
1701                                 continue;
1702                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1703                                             orig_buf->start, buf->start,
1704                                             orig_root, ref_root,
1705                                             orig_generation, ref_generation,
1706                                             key.objectid);
1707                         if (ret)
1708                                 goto fail;
1709                 } else {
1710                         bytenr = btrfs_node_blockptr(buf, slot);
1711                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1712                                             orig_buf->start, buf->start,
1713                                             orig_root, ref_root,
1714                                             orig_generation, ref_generation,
1715                                             level - 1);
1716                         if (ret)
1717                                 goto fail;
1718                 }
1719         }
1720         return 0;
1721 fail:
1722         WARN_ON(1);
1723         return -1;
1724 }
1725
1726 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1727                                  struct btrfs_root *root,
1728                                  struct btrfs_path *path,
1729                                  struct btrfs_block_group_cache *cache)
1730 {
1731         int ret;
1732         int pending_ret;
1733         struct btrfs_root *extent_root = root->fs_info->extent_root;
1734         unsigned long bi;
1735         struct extent_buffer *leaf;
1736
1737         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1738         if (ret < 0)
1739                 goto fail;
1740         BUG_ON(ret);
1741
1742         leaf = path->nodes[0];
1743         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1744         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1745         btrfs_mark_buffer_dirty(leaf);
1746         btrfs_release_path(extent_root, path);
1747 fail:
1748         finish_current_insert(trans, extent_root, 0);
1749         pending_ret = del_pending_extents(trans, extent_root, 0);
1750         if (ret)
1751                 return ret;
1752         if (pending_ret)
1753                 return pending_ret;
1754         return 0;
1755
1756 }
1757
1758 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1759                                    struct btrfs_root *root)
1760 {
1761         struct btrfs_block_group_cache *cache, *entry;
1762         struct rb_node *n;
1763         int err = 0;
1764         int werr = 0;
1765         struct btrfs_path *path;
1766         u64 last = 0;
1767
1768         path = btrfs_alloc_path();
1769         if (!path)
1770                 return -ENOMEM;
1771
1772         while(1) {
1773                 cache = NULL;
1774                 spin_lock(&root->fs_info->block_group_cache_lock);
1775                 for (n = rb_first(&root->fs_info->block_group_cache_tree);
1776                      n; n = rb_next(n)) {
1777                         entry = rb_entry(n, struct btrfs_block_group_cache,
1778                                          cache_node);
1779                         if (entry->dirty) {
1780                                 cache = entry;
1781                                 break;
1782                         }
1783                 }
1784                 spin_unlock(&root->fs_info->block_group_cache_lock);
1785
1786                 if (!cache)
1787                         break;
1788
1789                 cache->dirty = 0;
1790                 last += cache->key.offset;
1791
1792                 err = write_one_cache_group(trans, root,
1793                                             path, cache);
1794                 /*
1795                  * if we fail to write the cache group, we want
1796                  * to keep it marked dirty in hopes that a later
1797                  * write will work
1798                  */
1799                 if (err) {
1800                         werr = err;
1801                         continue;
1802                 }
1803         }
1804         btrfs_free_path(path);
1805         return werr;
1806 }
1807
1808 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1809                              u64 total_bytes, u64 bytes_used,
1810                              struct btrfs_space_info **space_info)
1811 {
1812         struct btrfs_space_info *found;
1813
1814         found = __find_space_info(info, flags);
1815         if (found) {
1816                 spin_lock(&found->lock);
1817                 found->total_bytes += total_bytes;
1818                 found->bytes_used += bytes_used;
1819                 found->full = 0;
1820                 spin_unlock(&found->lock);
1821                 *space_info = found;
1822                 return 0;
1823         }
1824         found = kzalloc(sizeof(*found), GFP_NOFS);
1825         if (!found)
1826                 return -ENOMEM;
1827
1828         list_add(&found->list, &info->space_info);
1829         INIT_LIST_HEAD(&found->block_groups);
1830         init_rwsem(&found->groups_sem);
1831         spin_lock_init(&found->lock);
1832         found->flags = flags;
1833         found->total_bytes = total_bytes;
1834         found->bytes_used = bytes_used;
1835         found->bytes_pinned = 0;
1836         found->bytes_reserved = 0;
1837         found->bytes_readonly = 0;
1838         found->full = 0;
1839         found->force_alloc = 0;
1840         *space_info = found;
1841         return 0;
1842 }
1843
1844 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1845 {
1846         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1847                                    BTRFS_BLOCK_GROUP_RAID1 |
1848                                    BTRFS_BLOCK_GROUP_RAID10 |
1849                                    BTRFS_BLOCK_GROUP_DUP);
1850         if (extra_flags) {
1851                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1852                         fs_info->avail_data_alloc_bits |= extra_flags;
1853                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1854                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1855                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1856                         fs_info->avail_system_alloc_bits |= extra_flags;
1857         }
1858 }
1859
1860 static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
1861 {
1862         spin_lock(&cache->space_info->lock);
1863         spin_lock(&cache->lock);
1864         if (!cache->ro) {
1865                 cache->space_info->bytes_readonly += cache->key.offset -
1866                                         btrfs_block_group_used(&cache->item);
1867                 cache->ro = 1;
1868         }
1869         spin_unlock(&cache->lock);
1870         spin_unlock(&cache->space_info->lock);
1871 }
1872
1873 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1874 {
1875         u64 num_devices = root->fs_info->fs_devices->rw_devices;
1876
1877         if (num_devices == 1)
1878                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1879         if (num_devices < 4)
1880                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1881
1882         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1883             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1884                       BTRFS_BLOCK_GROUP_RAID10))) {
1885                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1886         }
1887
1888         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1889             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1890                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1891         }
1892
1893         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1894             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1895              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1896              (flags & BTRFS_BLOCK_GROUP_DUP)))
1897                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1898         return flags;
1899 }
1900
1901 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1902                           struct btrfs_root *extent_root, u64 alloc_bytes,
1903                           u64 flags, int force)
1904 {
1905         struct btrfs_space_info *space_info;
1906         u64 thresh;
1907         int ret = 0;
1908
1909         mutex_lock(&extent_root->fs_info->chunk_mutex);
1910
1911         flags = btrfs_reduce_alloc_profile(extent_root, flags);
1912
1913         space_info = __find_space_info(extent_root->fs_info, flags);
1914         if (!space_info) {
1915                 ret = update_space_info(extent_root->fs_info, flags,
1916                                         0, 0, &space_info);
1917                 BUG_ON(ret);
1918         }
1919         BUG_ON(!space_info);
1920
1921         spin_lock(&space_info->lock);
1922         if (space_info->force_alloc) {
1923                 force = 1;
1924                 space_info->force_alloc = 0;
1925         }
1926         if (space_info->full) {
1927                 spin_unlock(&space_info->lock);
1928                 goto out;
1929         }
1930
1931         thresh = space_info->total_bytes - space_info->bytes_readonly;
1932         thresh = div_factor(thresh, 6);
1933         if (!force &&
1934            (space_info->bytes_used + space_info->bytes_pinned +
1935             space_info->bytes_reserved + alloc_bytes) < thresh) {
1936                 spin_unlock(&space_info->lock);
1937                 goto out;
1938         }
1939         spin_unlock(&space_info->lock);
1940
1941         ret = btrfs_alloc_chunk(trans, extent_root, flags);
1942         if (ret) {
1943 printk("space info full %Lu\n", flags);
1944                 space_info->full = 1;
1945         }
1946 out:
1947         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1948         return ret;
1949 }
1950
1951 static int update_block_group(struct btrfs_trans_handle *trans,
1952                               struct btrfs_root *root,
1953                               u64 bytenr, u64 num_bytes, int alloc,
1954                               int mark_free)
1955 {
1956         struct btrfs_block_group_cache *cache;
1957         struct btrfs_fs_info *info = root->fs_info;
1958         u64 total = num_bytes;
1959         u64 old_val;
1960         u64 byte_in_group;
1961
1962         while(total) {
1963                 cache = btrfs_lookup_block_group(info, bytenr);
1964                 if (!cache)
1965                         return -1;
1966                 byte_in_group = bytenr - cache->key.objectid;
1967                 WARN_ON(byte_in_group > cache->key.offset);
1968
1969                 spin_lock(&cache->space_info->lock);
1970                 spin_lock(&cache->lock);
1971                 cache->dirty = 1;
1972                 old_val = btrfs_block_group_used(&cache->item);
1973                 num_bytes = min(total, cache->key.offset - byte_in_group);
1974                 if (alloc) {
1975                         old_val += num_bytes;
1976                         cache->space_info->bytes_used += num_bytes;
1977                         if (cache->ro) {
1978                                 cache->space_info->bytes_readonly -= num_bytes;
1979                                 WARN_ON(1);
1980                         }
1981                         btrfs_set_block_group_used(&cache->item, old_val);
1982                         spin_unlock(&cache->lock);
1983                         spin_unlock(&cache->space_info->lock);
1984                 } else {
1985                         old_val -= num_bytes;
1986                         cache->space_info->bytes_used -= num_bytes;
1987                         if (cache->ro)
1988                                 cache->space_info->bytes_readonly += num_bytes;
1989                         btrfs_set_block_group_used(&cache->item, old_val);
1990                         spin_unlock(&cache->lock);
1991                         spin_unlock(&cache->space_info->lock);
1992                         if (mark_free) {
1993                                 int ret;
1994                                 ret = btrfs_add_free_space(cache, bytenr,
1995                                                            num_bytes);
1996                                 if (ret)
1997                                         return -1;
1998                         }
1999                 }
2000                 total -= num_bytes;
2001                 bytenr += num_bytes;
2002         }
2003         return 0;
2004 }
2005
2006 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
2007 {
2008         struct btrfs_block_group_cache *cache;
2009
2010         cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
2011         if (!cache)
2012                 return 0;
2013
2014         return cache->key.objectid;
2015 }
2016
2017 int btrfs_update_pinned_extents(struct btrfs_root *root,
2018                                 u64 bytenr, u64 num, int pin)
2019 {
2020         u64 len;
2021         struct btrfs_block_group_cache *cache;
2022         struct btrfs_fs_info *fs_info = root->fs_info;
2023
2024         WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex));
2025         if (pin) {
2026                 set_extent_dirty(&fs_info->pinned_extents,
2027                                 bytenr, bytenr + num - 1, GFP_NOFS);
2028         } else {
2029                 clear_extent_dirty(&fs_info->pinned_extents,
2030                                 bytenr, bytenr + num - 1, GFP_NOFS);
2031         }
2032         while (num > 0) {
2033                 cache = btrfs_lookup_block_group(fs_info, bytenr);
2034                 BUG_ON(!cache);
2035                 len = min(num, cache->key.offset -
2036                           (bytenr - cache->key.objectid));
2037                 if (pin) {
2038                         spin_lock(&cache->space_info->lock);
2039                         spin_lock(&cache->lock);
2040                         cache->pinned += len;
2041                         cache->space_info->bytes_pinned += len;
2042                         spin_unlock(&cache->lock);
2043                         spin_unlock(&cache->space_info->lock);
2044                         fs_info->total_pinned += len;
2045                 } else {
2046                         spin_lock(&cache->space_info->lock);
2047                         spin_lock(&cache->lock);
2048                         cache->pinned -= len;
2049                         cache->space_info->bytes_pinned -= len;
2050                         spin_unlock(&cache->lock);
2051                         spin_unlock(&cache->space_info->lock);
2052                         fs_info->total_pinned -= len;
2053                         if (cache->cached)
2054                                 btrfs_add_free_space(cache, bytenr, len);
2055                 }
2056                 bytenr += len;
2057                 num -= len;
2058         }
2059         return 0;
2060 }
2061
2062 static int update_reserved_extents(struct btrfs_root *root,
2063                                    u64 bytenr, u64 num, int reserve)
2064 {
2065         u64 len;
2066         struct btrfs_block_group_cache *cache;
2067         struct btrfs_fs_info *fs_info = root->fs_info;
2068
2069         while (num > 0) {
2070                 cache = btrfs_lookup_block_group(fs_info, bytenr);
2071                 BUG_ON(!cache);
2072                 len = min(num, cache->key.offset -
2073                           (bytenr - cache->key.objectid));
2074
2075                 spin_lock(&cache->space_info->lock);
2076                 spin_lock(&cache->lock);
2077                 if (reserve) {
2078                         cache->reserved += len;
2079                         cache->space_info->bytes_reserved += len;
2080                 } else {
2081                         cache->reserved -= len;
2082                         cache->space_info->bytes_reserved -= len;
2083                 }
2084                 spin_unlock(&cache->lock);
2085                 spin_unlock(&cache->space_info->lock);
2086                 bytenr += len;
2087                 num -= len;
2088         }
2089         return 0;
2090 }
2091
2092 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
2093 {
2094         u64 last = 0;
2095         u64 start;
2096         u64 end;
2097         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
2098         int ret;
2099
2100         mutex_lock(&root->fs_info->pinned_mutex);
2101         while(1) {
2102                 ret = find_first_extent_bit(pinned_extents, last,
2103                                             &start, &end, EXTENT_DIRTY);
2104                 if (ret)
2105                         break;
2106                 set_extent_dirty(copy, start, end, GFP_NOFS);
2107                 last = end + 1;
2108         }
2109         mutex_unlock(&root->fs_info->pinned_mutex);
2110         return 0;
2111 }
2112
2113 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2114                                struct btrfs_root *root,
2115                                struct extent_io_tree *unpin)
2116 {
2117         u64 start;
2118         u64 end;
2119         int ret;
2120
2121         mutex_lock(&root->fs_info->pinned_mutex);
2122         while(1) {
2123                 ret = find_first_extent_bit(unpin, 0, &start, &end,
2124                                             EXTENT_DIRTY);
2125                 if (ret)
2126                         break;
2127                 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
2128                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2129                 if (need_resched()) {
2130                         mutex_unlock(&root->fs_info->pinned_mutex);
2131                         cond_resched();
2132                         mutex_lock(&root->fs_info->pinned_mutex);
2133                 }
2134         }
2135         mutex_unlock(&root->fs_info->pinned_mutex);
2136         return 0;
2137 }
2138
2139 static int finish_current_insert(struct btrfs_trans_handle *trans,
2140                                  struct btrfs_root *extent_root, int all)
2141 {
2142         u64 start;
2143         u64 end;
2144         u64 priv;
2145         u64 search = 0;
2146         u64 skipped = 0;
2147         struct btrfs_fs_info *info = extent_root->fs_info;
2148         struct btrfs_path *path;
2149         struct pending_extent_op *extent_op, *tmp;
2150         struct list_head insert_list, update_list;
2151         int ret;
2152         int num_inserts = 0, max_inserts;
2153
2154         path = btrfs_alloc_path();
2155         INIT_LIST_HEAD(&insert_list);
2156         INIT_LIST_HEAD(&update_list);
2157
2158         max_inserts = extent_root->leafsize /
2159                 (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) +
2160                  sizeof(struct btrfs_extent_ref) +
2161                  sizeof(struct btrfs_extent_item));
2162 again:
2163         mutex_lock(&info->extent_ins_mutex);
2164         while (1) {
2165                 ret = find_first_extent_bit(&info->extent_ins, search, &start,
2166                                             &end, EXTENT_WRITEBACK);
2167                 if (ret) {
2168                         if (skipped && all && !num_inserts) {
2169                                 skipped = 0;
2170                                 search = 0;
2171                                 continue;
2172                         }
2173                         mutex_unlock(&info->extent_ins_mutex);
2174                         break;
2175                 }
2176
2177                 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
2178                 if (!ret) {
2179                         skipped = 1;
2180                         search = end + 1;
2181                         if (need_resched()) {
2182                                 mutex_unlock(&info->extent_ins_mutex);
2183                                 cond_resched();
2184                                 mutex_lock(&info->extent_ins_mutex);
2185                         }
2186                         continue;
2187                 }
2188
2189                 ret = get_state_private(&info->extent_ins, start, &priv);
2190                 BUG_ON(ret);
2191                 extent_op = (struct pending_extent_op *)(unsigned long) priv;
2192
2193                 if (extent_op->type == PENDING_EXTENT_INSERT) {
2194                         num_inserts++;
2195                         list_add_tail(&extent_op->list, &insert_list);
2196                         search = end + 1;
2197                         if (num_inserts == max_inserts) {
2198                                 mutex_unlock(&info->extent_ins_mutex);
2199                                 break;
2200                         }
2201                 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
2202                         list_add_tail(&extent_op->list, &update_list);
2203                         search = end + 1;
2204                 } else {
2205                         BUG();
2206                 }
2207         }
2208
2209         /*
2210          * process the update list, clear the writeback bit for it, and if
2211          * somebody marked this thing for deletion then just unlock it and be
2212          * done, the free_extents will handle it
2213          */
2214         mutex_lock(&info->extent_ins_mutex);
2215         list_for_each_entry_safe(extent_op, tmp, &update_list, list) {
2216                 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2217                                   extent_op->bytenr + extent_op->num_bytes - 1,
2218                                   EXTENT_WRITEBACK, GFP_NOFS);
2219                 if (extent_op->del) {
2220                         list_del_init(&extent_op->list);
2221                         unlock_extent(&info->extent_ins, extent_op->bytenr,
2222                                       extent_op->bytenr + extent_op->num_bytes
2223                                       - 1, GFP_NOFS);
2224                         kfree(extent_op);
2225                 }
2226         }
2227         mutex_unlock(&info->extent_ins_mutex);
2228
2229         /*
2230          * still have things left on the update list, go ahead an update
2231          * everything
2232          */
2233         if (!list_empty(&update_list)) {
2234                 ret = update_backrefs(trans, extent_root, path, &update_list);
2235                 BUG_ON(ret);
2236         }
2237
2238         /*
2239          * if no inserts need to be done, but we skipped some extents and we
2240          * need to make sure everything is cleaned then reset everything and
2241          * go back to the beginning
2242          */
2243         if (!num_inserts && all && skipped) {
2244                 search = 0;
2245                 skipped = 0;
2246                 INIT_LIST_HEAD(&update_list);
2247                 INIT_LIST_HEAD(&insert_list);
2248                 goto again;
2249         } else if (!num_inserts) {
2250                 goto out;
2251         }
2252
2253         /*
2254          * process the insert extents list.  Again if we are deleting this
2255          * extent, then just unlock it, pin down the bytes if need be, and be
2256          * done with it.  Saves us from having to actually insert the extent
2257          * into the tree and then subsequently come along and delete it
2258          */
2259         mutex_lock(&info->extent_ins_mutex);
2260         list_for_each_entry_safe(extent_op, tmp, &insert_list, list) {
2261                 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2262                                   extent_op->bytenr + extent_op->num_bytes - 1,
2263                                   EXTENT_WRITEBACK, GFP_NOFS);
2264                 if (extent_op->del) {
2265                         list_del_init(&extent_op->list);
2266                         unlock_extent(&info->extent_ins, extent_op->bytenr,
2267                                       extent_op->bytenr + extent_op->num_bytes
2268                                       - 1, GFP_NOFS);
2269
2270                         mutex_lock(&extent_root->fs_info->pinned_mutex);
2271                         ret = pin_down_bytes(trans, extent_root,
2272                                              extent_op->bytenr,
2273                                              extent_op->num_bytes, 0);
2274                         mutex_unlock(&extent_root->fs_info->pinned_mutex);
2275
2276                         ret = update_block_group(trans, extent_root,
2277                                                  extent_op->bytenr,
2278                                                  extent_op->num_bytes,
2279                                                  0, ret > 0);
2280                         BUG_ON(ret);
2281                         kfree(extent_op);
2282                         num_inserts--;
2283                 }
2284         }
2285         mutex_unlock(&info->extent_ins_mutex);
2286
2287         ret = insert_extents(trans, extent_root, path, &insert_list,
2288                              num_inserts);
2289         BUG_ON(ret);
2290
2291         /*
2292          * if we broke out of the loop in order to insert stuff because we hit
2293          * the maximum number of inserts at a time we can handle, then loop
2294          * back and pick up where we left off
2295          */
2296         if (num_inserts == max_inserts) {
2297                 INIT_LIST_HEAD(&insert_list);
2298                 INIT_LIST_HEAD(&update_list);
2299                 num_inserts = 0;
2300                 goto again;
2301         }
2302
2303         /*
2304          * again, if we need to make absolutely sure there are no more pending
2305          * extent operations left and we know that we skipped some, go back to
2306          * the beginning and do it all again
2307          */
2308         if (all && skipped) {
2309                 INIT_LIST_HEAD(&insert_list);
2310                 INIT_LIST_HEAD(&update_list);
2311                 search = 0;
2312                 skipped = 0;
2313                 num_inserts = 0;
2314                 goto again;
2315         }
2316 out:
2317         btrfs_free_path(path);
2318         return 0;
2319 }
2320
2321 static int pin_down_bytes(struct btrfs_trans_handle *trans,
2322                           struct btrfs_root *root,
2323                           u64 bytenr, u64 num_bytes, int is_data)
2324 {
2325         int err = 0;
2326         struct extent_buffer *buf;
2327
2328         if (is_data)
2329                 goto pinit;
2330
2331         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
2332         if (!buf)
2333                 goto pinit;
2334
2335         /* we can reuse a block if it hasn't been written
2336          * and it is from this transaction.  We can't
2337          * reuse anything from the tree log root because
2338          * it has tiny sub-transactions.
2339          */
2340         if (btrfs_buffer_uptodate(buf, 0) &&
2341             btrfs_try_tree_lock(buf)) {
2342                 u64 header_owner = btrfs_header_owner(buf);
2343                 u64 header_transid = btrfs_header_generation(buf);
2344                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2345                     header_owner != BTRFS_TREE_RELOC_OBJECTID &&
2346                     header_transid == trans->transid &&
2347                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2348                         clean_tree_block(NULL, root, buf);
2349                         btrfs_tree_unlock(buf);
2350                         free_extent_buffer(buf);
2351                         return 1;
2352                 }
2353                 btrfs_tree_unlock(buf);
2354         }
2355         free_extent_buffer(buf);
2356 pinit:
2357         btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2358
2359         BUG_ON(err < 0);
2360         return 0;
2361 }
2362
2363 /*
2364  * remove an extent from the root, returns 0 on success
2365  */
2366 static int __free_extent(struct btrfs_trans_handle *trans,
2367                          struct btrfs_root *root,
2368                          u64 bytenr, u64 num_bytes, u64 parent,
2369                          u64 root_objectid, u64 ref_generation,
2370                          u64 owner_objectid, int pin, int mark_free)
2371 {
2372         struct btrfs_path *path;
2373         struct btrfs_key key;
2374         struct btrfs_fs_info *info = root->fs_info;
2375         struct btrfs_root *extent_root = info->extent_root;
2376         struct extent_buffer *leaf;
2377         int ret;
2378         int extent_slot = 0;
2379         int found_extent = 0;
2380         int num_to_del = 1;
2381         struct btrfs_extent_item *ei;
2382         u32 refs;
2383
2384         key.objectid = bytenr;
2385         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
2386         key.offset = num_bytes;
2387         path = btrfs_alloc_path();
2388         if (!path)
2389                 return -ENOMEM;
2390
2391         path->reada = 1;
2392         ret = lookup_extent_backref(trans, extent_root, path,
2393                                     bytenr, parent, root_objectid,
2394                                     ref_generation, owner_objectid, 1);
2395         if (ret == 0) {
2396                 struct btrfs_key found_key;
2397                 extent_slot = path->slots[0];
2398                 while(extent_slot > 0) {
2399                         extent_slot--;
2400                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2401                                               extent_slot);
2402                         if (found_key.objectid != bytenr)
2403                                 break;
2404                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
2405                             found_key.offset == num_bytes) {
2406                                 found_extent = 1;
2407                                 break;
2408                         }
2409                         if (path->slots[0] - extent_slot > 5)
2410                                 break;
2411                 }
2412                 if (!found_extent) {
2413                         ret = remove_extent_backref(trans, extent_root, path);
2414                         BUG_ON(ret);
2415                         btrfs_release_path(extent_root, path);
2416                         ret = btrfs_search_slot(trans, extent_root,
2417                                                 &key, path, -1, 1);
2418                         if (ret) {
2419                                 printk(KERN_ERR "umm, got %d back from search"
2420                                        ", was looking for %Lu\n", ret,
2421                                        bytenr);
2422                                 btrfs_print_leaf(extent_root, path->nodes[0]);
2423                         }
2424                         BUG_ON(ret);
2425                         extent_slot = path->slots[0];
2426                 }
2427         } else {
2428                 btrfs_print_leaf(extent_root, path->nodes[0]);
2429                 WARN_ON(1);
2430                 printk("Unable to find ref byte nr %Lu root %Lu "
2431                        "gen %Lu owner %Lu\n", bytenr,
2432                        root_objectid, ref_generation, owner_objectid);
2433         }
2434
2435         leaf = path->nodes[0];
2436         ei = btrfs_item_ptr(leaf, extent_slot,
2437                             struct btrfs_extent_item);
2438         refs = btrfs_extent_refs(leaf, ei);
2439         BUG_ON(refs == 0);
2440         refs -= 1;
2441         btrfs_set_extent_refs(leaf, ei, refs);
2442
2443         btrfs_mark_buffer_dirty(leaf);
2444
2445         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
2446                 struct btrfs_extent_ref *ref;
2447                 ref = btrfs_item_ptr(leaf, path->slots[0],
2448                                      struct btrfs_extent_ref);
2449                 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
2450                 /* if the back ref and the extent are next to each other
2451                  * they get deleted below in one shot
2452                  */
2453                 path->slots[0] = extent_slot;
2454                 num_to_del = 2;
2455         } else if (found_extent) {
2456                 /* otherwise delete the extent back ref */
2457                 ret = remove_extent_backref(trans, extent_root, path);
2458                 BUG_ON(ret);
2459                 /* if refs are 0, we need to setup the path for deletion */
2460                 if (refs == 0) {
2461                         btrfs_release_path(extent_root, path);
2462                         ret = btrfs_search_slot(trans, extent_root, &key, path,
2463                                                 -1, 1);
2464                         BUG_ON(ret);
2465                 }
2466         }
2467
2468         if (refs == 0) {
2469                 u64 super_used;
2470                 u64 root_used;
2471 #ifdef BIO_RW_DISCARD
2472                 u64 map_length = num_bytes;
2473                 struct btrfs_multi_bio *multi = NULL;
2474 #endif
2475
2476                 if (pin) {
2477                         mutex_lock(&root->fs_info->pinned_mutex);
2478                         ret = pin_down_bytes(trans, root, bytenr, num_bytes,
2479                                 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
2480                         mutex_unlock(&root->fs_info->pinned_mutex);
2481                         if (ret > 0)
2482                                 mark_free = 1;
2483                         BUG_ON(ret < 0);
2484                 }
2485
2486                 /* block accounting for super block */
2487                 spin_lock_irq(&info->delalloc_lock);
2488                 super_used = btrfs_super_bytes_used(&info->super_copy);
2489                 btrfs_set_super_bytes_used(&info->super_copy,
2490                                            super_used - num_bytes);
2491                 spin_unlock_irq(&info->delalloc_lock);
2492
2493                 /* block accounting for root item */
2494                 root_used = btrfs_root_used(&root->root_item);
2495                 btrfs_set_root_used(&root->root_item,
2496                                            root_used - num_bytes);
2497                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
2498                                       num_to_del);
2499                 BUG_ON(ret);
2500                 btrfs_release_path(extent_root, path);
2501                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
2502                                          mark_free);
2503                 BUG_ON(ret);
2504
2505 #ifdef BIO_RW_DISCARD
2506                 /* Tell the block device(s) that the sectors can be discarded */
2507                 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
2508                                       bytenr, &map_length, &multi, 0);
2509                 if (!ret) {
2510                         struct btrfs_bio_stripe *stripe = multi->stripes;
2511                         int i;
2512
2513                         if (map_length > num_bytes)
2514                                 map_length = num_bytes;
2515
2516                         for (i = 0; i < multi->num_stripes; i++, stripe++) {
2517                                 btrfs_issue_discard(stripe->dev->bdev,
2518                                                     stripe->physical,
2519                                                      map_length);
2520                         }
2521                         kfree(multi);
2522                 }
2523 #endif
2524         }
2525         btrfs_free_path(path);
2526         finish_current_insert(trans, extent_root, 0);
2527         return ret;
2528 }
2529
2530 /*
2531  * find all the blocks marked as pending in the radix tree and remove
2532  * them from the extent map
2533  */
2534 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
2535                                btrfs_root *extent_root, int all)
2536 {
2537         int ret;
2538         int err = 0;
2539         u64 start;
2540         u64 end;
2541         u64 priv;
2542         u64 search = 0;
2543         int nr = 0, skipped = 0;
2544         struct extent_io_tree *pending_del;
2545         struct extent_io_tree *extent_ins;
2546         struct pending_extent_op *extent_op;
2547         struct btrfs_fs_info *info = extent_root->fs_info;
2548         struct list_head delete_list;
2549
2550         INIT_LIST_HEAD(&delete_list);
2551         extent_ins = &extent_root->fs_info->extent_ins;
2552         pending_del = &extent_root->fs_info->pending_del;
2553
2554 again:
2555         mutex_lock(&info->extent_ins_mutex);
2556         while(1) {
2557                 ret = find_first_extent_bit(pending_del, search, &start, &end,
2558                                             EXTENT_WRITEBACK);
2559                 if (ret) {
2560                         if (all && skipped && !nr) {
2561                                 search = 0;
2562                                 continue;
2563                         }
2564                         mutex_unlock(&info->extent_ins_mutex);
2565                         break;
2566                 }
2567
2568                 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS);
2569                 if (!ret) {
2570                         search = end+1;
2571                         skipped = 1;
2572
2573                         if (need_resched()) {
2574                                 mutex_unlock(&info->extent_ins_mutex);
2575                                 cond_resched();
2576                                 mutex_lock(&info->extent_ins_mutex);
2577                         }
2578
2579                         continue;
2580                 }
2581                 BUG_ON(ret < 0);
2582
2583                 ret = get_state_private(pending_del, start, &priv);
2584                 BUG_ON(ret);
2585                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2586
2587                 clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK,
2588                                   GFP_NOFS);
2589                 if (!test_range_bit(extent_ins, start, end,
2590                                     EXTENT_WRITEBACK, 0)) {
2591                         list_add_tail(&extent_op->list, &delete_list);
2592                         nr++;
2593                 } else {
2594                         kfree(extent_op);
2595
2596                         ret = get_state_private(&info->extent_ins, start,
2597                                                 &priv);
2598                         BUG_ON(ret);
2599                         extent_op = (struct pending_extent_op *)
2600                                                 (unsigned long)priv;
2601
2602                         clear_extent_bits(&info->extent_ins, start, end,
2603                                           EXTENT_WRITEBACK, GFP_NOFS);
2604
2605                         if (extent_op->type == PENDING_BACKREF_UPDATE) {
2606                                 list_add_tail(&extent_op->list, &delete_list);
2607                                 search = end + 1;
2608                                 nr++;
2609                                 continue;
2610                         }
2611
2612                         mutex_lock(&extent_root->fs_info->pinned_mutex);
2613                         ret = pin_down_bytes(trans, extent_root, start,
2614                                              end + 1 - start, 0);
2615                         mutex_unlock(&extent_root->fs_info->pinned_mutex);
2616
2617                         ret = update_block_group(trans, extent_root, start,
2618                                                 end + 1 - start, 0, ret > 0);
2619
2620                         unlock_extent(extent_ins, start, end, GFP_NOFS);
2621                         BUG_ON(ret);
2622                         kfree(extent_op);
2623                 }
2624                 if (ret)
2625                         err = ret;
2626
2627                 search = end + 1;
2628
2629                 if (need_resched()) {
2630                         mutex_unlock(&info->extent_ins_mutex);
2631                         cond_resched();
2632                         mutex_lock(&info->extent_ins_mutex);
2633                 }
2634         }
2635
2636         if (nr) {
2637                 ret = free_extents(trans, extent_root, &delete_list);
2638                 BUG_ON(ret);
2639         }
2640
2641         if (all && skipped) {
2642                 INIT_LIST_HEAD(&delete_list);
2643                 search = 0;
2644                 nr = 0;
2645                 goto again;
2646         }
2647
2648         return err;
2649 }
2650
2651 /*
2652  * remove an extent from the root, returns 0 on success
2653  */
2654 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2655                                struct btrfs_root *root,
2656                                u64 bytenr, u64 num_bytes, u64 parent,
2657                                u64 root_objectid, u64 ref_generation,
2658                                u64 owner_objectid, int pin)
2659 {
2660         struct btrfs_root *extent_root = root->fs_info->extent_root;
2661         int pending_ret;
2662         int ret;
2663
2664         WARN_ON(num_bytes < root->sectorsize);
2665         if (root == extent_root) {
2666                 struct pending_extent_op *extent_op = NULL;
2667
2668                 mutex_lock(&root->fs_info->extent_ins_mutex);
2669                 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
2670                                 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
2671                         u64 priv;
2672                         ret = get_state_private(&root->fs_info->extent_ins,
2673                                                 bytenr, &priv);
2674                         BUG_ON(ret);
2675                         extent_op = (struct pending_extent_op *)
2676                                                 (unsigned long)priv;
2677
2678                         extent_op->del = 1;
2679                         if (extent_op->type == PENDING_EXTENT_INSERT) {
2680                                 mutex_unlock(&root->fs_info->extent_ins_mutex);
2681                                 return 0;
2682                         }
2683                 }
2684
2685                 if (extent_op) {
2686                         ref_generation = extent_op->orig_generation;
2687                         parent = extent_op->orig_parent;
2688                 }
2689
2690                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2691                 BUG_ON(!extent_op);
2692
2693                 extent_op->type = PENDING_EXTENT_DELETE;
2694                 extent_op->bytenr = bytenr;
2695                 extent_op->num_bytes = num_bytes;
2696                 extent_op->parent = parent;
2697                 extent_op->orig_parent = parent;
2698                 extent_op->generation = ref_generation;
2699                 extent_op->orig_generation = ref_generation;
2700                 extent_op->level = (int)owner_objectid;
2701                 INIT_LIST_HEAD(&extent_op->list);
2702                 extent_op->del = 0;
2703
2704                 set_extent_bits(&root->fs_info->pending_del,
2705                                 bytenr, bytenr + num_bytes - 1,
2706                                 EXTENT_WRITEBACK, GFP_NOFS);
2707                 set_state_private(&root->fs_info->pending_del,
2708                                   bytenr, (unsigned long)extent_op);
2709                 mutex_unlock(&root->fs_info->extent_ins_mutex);
2710                 return 0;
2711         }
2712         /* if metadata always pin */
2713         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2714                 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2715                         struct btrfs_block_group_cache *cache;
2716
2717                         /* btrfs_free_reserved_extent */
2718                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
2719                         BUG_ON(!cache);
2720                         btrfs_add_free_space(cache, bytenr, num_bytes);
2721                         update_reserved_extents(root, bytenr, num_bytes, 0);
2722                         return 0;
2723                 }
2724                 pin = 1;
2725         }
2726
2727         /* if data pin when any transaction has committed this */
2728         if (ref_generation != trans->transid)
2729                 pin = 1;
2730
2731         ret = __free_extent(trans, root, bytenr, num_bytes, parent,
2732                             root_objectid, ref_generation,
2733                             owner_objectid, pin, pin == 0);
2734
2735         finish_current_insert(trans, root->fs_info->extent_root, 0);
2736         pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0);
2737         return ret ? ret : pending_ret;
2738 }
2739
2740 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2741                       struct btrfs_root *root,
2742                       u64 bytenr, u64 num_bytes, u64 parent,
2743                       u64 root_objectid, u64 ref_generation,
2744                       u64 owner_objectid, int pin)
2745 {
2746         int ret;
2747
2748         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent,
2749                                   root_objectid, ref_generation,
2750                                   owner_objectid, pin);
2751         return ret;
2752 }
2753
2754 static u64 stripe_align(struct btrfs_root *root, u64 val)
2755 {
2756         u64 mask = ((u64)root->stripesize - 1);
2757         u64 ret = (val + mask) & ~mask;
2758         return ret;
2759 }
2760
2761 /*
2762  * walks the btree of allocated extents and find a hole of a given size.
2763  * The key ins is changed to record the hole:
2764  * ins->objectid == block start
2765  * ins->flags = BTRFS_EXTENT_ITEM_KEY
2766  * ins->offset == number of blocks
2767  * Any available blocks before search_start are skipped.
2768  */
2769 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2770                                      struct btrfs_root *orig_root,
2771                                      u64 num_bytes, u64 empty_size,
2772                                      u64 search_start, u64 search_end,
2773                                      u64 hint_byte, struct btrfs_key *ins,
2774                                      u64 exclude_start, u64 exclude_nr,
2775                                      int data)
2776 {
2777         int ret = 0;
2778         struct btrfs_root * root = orig_root->fs_info->extent_root;
2779         u64 total_needed = num_bytes;
2780         u64 *last_ptr = NULL;
2781         u64 last_wanted = 0;
2782         struct btrfs_block_group_cache *block_group = NULL;
2783         int chunk_alloc_done = 0;
2784         int empty_cluster = 2 * 1024 * 1024;
2785         int allowed_chunk_alloc = 0;
2786         struct list_head *head = NULL, *cur = NULL;
2787         int loop = 0;
2788         int extra_loop = 0;
2789         struct btrfs_space_info *space_info;
2790
2791         WARN_ON(num_bytes < root->sectorsize);
2792         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
2793         ins->objectid = 0;
2794         ins->offset = 0;
2795
2796         if (orig_root->ref_cows || empty_size)
2797                 allowed_chunk_alloc = 1;
2798
2799         if (data & BTRFS_BLOCK_GROUP_METADATA) {
2800                 last_ptr = &root->fs_info->last_alloc;
2801                 empty_cluster = 64 * 1024;
2802         }
2803
2804         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
2805                 last_ptr = &root->fs_info->last_data_alloc;
2806
2807         if (last_ptr) {
2808                 if (*last_ptr) {
2809                         hint_byte = *last_ptr;
2810                         last_wanted = *last_ptr;
2811                 } else
2812                         empty_size += empty_cluster;
2813         } else {
2814                 empty_cluster = 0;
2815         }
2816         search_start = max(search_start, first_logical_byte(root, 0));
2817         search_start = max(search_start, hint_byte);
2818
2819         if (last_wanted && search_start != last_wanted) {
2820                 last_wanted = 0;
2821                 empty_size += empty_cluster;
2822         }
2823
2824         total_needed += empty_size;
2825         block_group = btrfs_lookup_block_group(root->fs_info, search_start);
2826         if (!block_group)
2827                 block_group = btrfs_lookup_first_block_group(root->fs_info,
2828                                                              search_start);
2829         space_info = __find_space_info(root->fs_info, data);
2830
2831         down_read(&space_info->groups_sem);
2832         while (1) {
2833                 struct btrfs_free_space *free_space;
2834                 /*
2835                  * the only way this happens if our hint points to a block
2836                  * group thats not of the proper type, while looping this
2837                  * should never happen
2838                  */
2839                 if (empty_size)
2840                         extra_loop = 1;
2841
2842                 if (!block_group)
2843                         goto new_group_no_lock;
2844
2845                 mutex_lock(&block_group->alloc_mutex);
2846                 if (unlikely(!block_group_bits(block_group, data)))
2847                         goto new_group;
2848
2849                 ret = cache_block_group(root, block_group);
2850                 if (ret) {
2851                         mutex_unlock(&block_group->alloc_mutex);
2852                         break;
2853                 }
2854
2855                 if (block_group->ro)
2856                         goto new_group;
2857
2858                 free_space = btrfs_find_free_space(block_group, search_start,
2859                                                    total_needed);
2860                 if (free_space) {
2861                         u64 start = block_group->key.objectid;
2862                         u64 end = block_group->key.objectid +
2863                                 block_group->key.offset;
2864
2865                         search_start = stripe_align(root, free_space->offset);
2866
2867                         /* move on to the next group */
2868                         if (search_start + num_bytes >= search_end)
2869                                 goto new_group;
2870
2871                         /* move on to the next group */
2872                         if (search_start + num_bytes > end)
2873                                 goto new_group;
2874
2875                         if (last_wanted && search_start != last_wanted) {
2876                                 total_needed += empty_cluster;
2877                                 empty_size += empty_cluster;
2878                                 last_wanted = 0;
2879                                 /*
2880                                  * if search_start is still in this block group
2881                                  * then we just re-search this block group
2882                                  */
2883                                 if (search_start >= start &&
2884                                     search_start < end) {
2885                                         mutex_unlock(&block_group->alloc_mutex);
2886                                         continue;
2887                                 }
2888
2889                                 /* else we go to the next block group */
2890                                 goto new_group;
2891                         }
2892
2893                         if (exclude_nr > 0 &&
2894                             (search_start + num_bytes > exclude_start &&
2895                              search_start < exclude_start + exclude_nr)) {
2896                                 search_start = exclude_start + exclude_nr;
2897                                 /*
2898                                  * if search_start is still in this block group
2899                                  * then we just re-search this block group
2900                                  */
2901                                 if (search_start >= start &&
2902                                     search_start < end) {
2903                                         mutex_unlock(&block_group->alloc_mutex);
2904                                         last_wanted = 0;
2905                                         continue;
2906                                 }
2907
2908                                 /* else we go to the next block group */
2909                                 goto new_group;
2910                         }
2911
2912                         ins->objectid = search_start;
2913                         ins->offset = num_bytes;
2914
2915                         btrfs_remove_free_space_lock(block_group, search_start,
2916                                                      num_bytes);
2917                         /* we are all good, lets return */
2918                         mutex_unlock(&block_group->alloc_mutex);
2919                         break;
2920                 }
2921 new_group:
2922                 mutex_unlock(&block_group->alloc_mutex);
2923 new_group_no_lock:
2924                 /* don't try to compare new allocations against the
2925                  * last allocation any more
2926                  */
2927                 last_wanted = 0;
2928
2929                 /*
2930                  * Here's how this works.
2931                  * loop == 0: we were searching a block group via a hint
2932                  *              and didn't find anything, so we start at
2933                  *              the head of the block groups and keep searching
2934                  * loop == 1: we're searching through all of the block groups
2935                  *              if we hit the head again we have searched
2936                  *              all of the block groups for this space and we
2937                  *              need to try and allocate, if we cant error out.
2938                  * loop == 2: we allocated more space and are looping through
2939                  *              all of the block groups again.
2940                  */
2941                 if (loop == 0) {
2942                         head = &space_info->block_groups;
2943                         cur = head->next;
2944                         loop++;
2945                 } else if (loop == 1 && cur == head) {
2946                         int keep_going;
2947
2948                         /* at this point we give up on the empty_size
2949                          * allocations and just try to allocate the min
2950                          * space.
2951                          *
2952                          * The extra_loop field was set if an empty_size
2953                          * allocation was attempted above, and if this
2954                          * is try we need to try the loop again without
2955                          * the additional empty_size.
2956                          */
2957                         total_needed -= empty_size;
2958                         empty_size = 0;
2959                         keep_going = extra_loop;
2960                         loop++;
2961
2962                         if (allowed_chunk_alloc && !chunk_alloc_done) {
2963                                 up_read(&space_info->groups_sem);
2964                                 ret = do_chunk_alloc(trans, root, num_bytes +
2965                                                      2 * 1024 * 1024, data, 1);
2966                                 down_read(&space_info->groups_sem);
2967                                 if (ret < 0)
2968                                         goto loop_check;
2969                                 head = &space_info->block_groups;
2970                                 /*
2971                                  * we've allocated a new chunk, keep
2972                                  * trying
2973                                  */
2974                                 keep_going = 1;
2975                                 chunk_alloc_done = 1;
2976                         } else if (!allowed_chunk_alloc) {
2977                                 space_info->force_alloc = 1;
2978                         }
2979 loop_check:
2980                         if (keep_going) {
2981                                 cur = head->next;
2982                                 extra_loop = 0;
2983                         } else {
2984                                 break;
2985                         }
2986                 } else if (cur == head) {
2987                         break;
2988                 }
2989
2990                 block_group = list_entry(cur, struct btrfs_block_group_cache,
2991                                          list);
2992                 search_start = block_group->key.objectid;
2993                 cur = cur->next;
2994         }
2995
2996         /* we found what we needed */
2997         if (ins->objectid) {
2998                 if (!(data & BTRFS_BLOCK_GROUP_DATA))
2999                         trans->block_group = block_group;
3000
3001                 if (last_ptr)
3002                         *last_ptr = ins->objectid + ins->offset;
3003                 ret = 0;
3004         } else if (!ret) {
3005                 printk(KERN_ERR "we were searching for %Lu bytes, num_bytes %Lu,"
3006                        " loop %d, allowed_alloc %d\n", total_needed, num_bytes,
3007                        loop, allowed_chunk_alloc);
3008                 ret = -ENOSPC;
3009         }
3010
3011         up_read(&space_info->groups_sem);
3012         return ret;
3013 }
3014
3015 static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
3016 {
3017         struct btrfs_block_group_cache *cache;
3018         struct list_head *l;
3019
3020         printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
3021                info->total_bytes - info->bytes_used - info->bytes_pinned -
3022                info->bytes_reserved, (info->full) ? "" : "not ");
3023
3024         down_read(&info->groups_sem);
3025         list_for_each(l, &info->block_groups) {
3026                 cache = list_entry(l, struct btrfs_block_group_cache, list);
3027                 spin_lock(&cache->lock);
3028                 printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
3029                        "%Lu pinned %Lu reserved\n",
3030                        cache->key.objectid, cache->key.offset,
3031                        btrfs_block_group_used(&cache->item),
3032                        cache->pinned, cache->reserved);
3033                 btrfs_dump_free_space(cache, bytes);
3034                 spin_unlock(&cache->lock);
3035         }
3036         up_read(&info->groups_sem);
3037 }
3038
3039 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3040                                   struct btrfs_root *root,
3041                                   u64 num_bytes, u64 min_alloc_size,
3042                                   u64 empty_size, u64 hint_byte,
3043                                   u64 search_end, struct btrfs_key *ins,
3044                                   u64 data)
3045 {
3046         int ret;
3047         u64 search_start = 0;
3048         u64 alloc_profile;
3049         struct btrfs_fs_info *info = root->fs_info;
3050
3051         if (data) {
3052                 alloc_profile = info->avail_data_alloc_bits &
3053                                 info->data_alloc_profile;
3054                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
3055         } else if (root == root->fs_info->chunk_root) {
3056                 alloc_profile = info->avail_system_alloc_bits &
3057                                 info->system_alloc_profile;
3058                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
3059         } else {
3060                 alloc_profile = info->avail_metadata_alloc_bits &
3061                                 info->metadata_alloc_profile;
3062                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
3063         }
3064 again:
3065         data = btrfs_reduce_alloc_profile(root, data);
3066         /*
3067          * the only place that sets empty_size is btrfs_realloc_node, which
3068          * is not called recursively on allocations
3069          */
3070         if (empty_size || root->ref_cows) {
3071                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
3072                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3073                                      2 * 1024 * 1024,
3074                                      BTRFS_BLOCK_GROUP_METADATA |
3075                                      (info->metadata_alloc_profile &
3076                                       info->avail_metadata_alloc_bits), 0);
3077                 }
3078                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3079                                      num_bytes + 2 * 1024 * 1024, data, 0);
3080         }
3081
3082         WARN_ON(num_bytes < root->sectorsize);
3083         ret = find_free_extent(trans, root, num_bytes, empty_size,
3084                                search_start, search_end, hint_byte, ins,
3085                                trans->alloc_exclude_start,
3086                                trans->alloc_exclude_nr, data);
3087
3088         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
3089                 num_bytes = num_bytes >> 1;
3090                 num_bytes = num_bytes & ~(root->sectorsize - 1);
3091                 num_bytes = max(num_bytes, min_alloc_size);
3092                 do_chunk_alloc(trans, root->fs_info->extent_root,
3093                                num_bytes, data, 1);
3094                 goto again;
3095         }
3096         if (ret) {
3097                 struct btrfs_space_info *sinfo;
3098
3099                 sinfo = __find_space_info(root->fs_info, data);
3100                 printk("allocation failed flags %Lu, wanted %Lu\n",
3101                        data, num_bytes);
3102                 dump_space_info(sinfo, num_bytes);
3103                 BUG();
3104         }
3105
3106         return ret;
3107 }
3108
3109 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
3110 {
3111         struct btrfs_block_group_cache *cache;
3112
3113         cache = btrfs_lookup_block_group(root->fs_info, start);
3114         if (!cache) {
3115                 printk(KERN_ERR "Unable to find block group for %Lu\n", start);
3116                 return -ENOSPC;
3117         }
3118         btrfs_add_free_space(cache, start, len);
3119         update_reserved_extents(root, start, len, 0);
3120         return 0;
3121 }
3122
3123 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3124                                   struct btrfs_root *root,
3125                                   u64 num_bytes, u64 min_alloc_size,
3126                                   u64 empty_size, u64 hint_byte,
3127                                   u64 search_end, struct btrfs_key *ins,
3128                                   u64 data)
3129 {
3130         int ret;
3131         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
3132                                      empty_size, hint_byte, search_end, ins,
3133                                      data);
3134         update_reserved_extents(root, ins->objectid, ins->offset, 1);
3135         return ret;
3136 }
3137
3138 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3139                                          struct btrfs_root *root, u64 parent,
3140                                          u64 root_objectid, u64 ref_generation,
3141                                          u64 owner, struct btrfs_key *ins)
3142 {
3143         int ret;
3144         int pending_ret;
3145         u64 super_used;
3146         u64 root_used;
3147         u64 num_bytes = ins->offset;
3148         u32 sizes[2];
3149         struct btrfs_fs_info *info = root->fs_info;
3150         struct btrfs_root *extent_root = info->extent_root;
3151         struct btrfs_extent_item *extent_item;
3152         struct btrfs_extent_ref *ref;
3153         struct btrfs_path *path;
3154         struct btrfs_key keys[2];
3155
3156         if (parent == 0)
3157                 parent = ins->objectid;
3158
3159         /* block accounting for super block */
3160         spin_lock_irq(&info->delalloc_lock);
3161         super_used = btrfs_super_bytes_used(&info->super_copy);
3162         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
3163         spin_unlock_irq(&info->delalloc_lock);
3164
3165         /* block accounting for root item */
3166         root_used = btrfs_root_used(&root->root_item);
3167         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
3168
3169         if (root == extent_root) {
3170                 struct pending_extent_op *extent_op;
3171
3172                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
3173                 BUG_ON(!extent_op);
3174
3175                 extent_op->type = PENDING_EXTENT_INSERT;
3176                 extent_op->bytenr = ins->objectid;
3177                 extent_op->num_bytes = ins->offset;
3178                 extent_op->parent = parent;
3179                 extent_op->orig_parent = 0;
3180                 extent_op->generation = ref_generation;
3181                 extent_op->orig_generation = 0;
3182                 extent_op->level = (int)owner;
3183                 INIT_LIST_HEAD(&extent_op->list);
3184                 extent_op->del = 0;
3185
3186                 mutex_lock(&root->fs_info->extent_ins_mutex);
3187                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
3188                                 ins->objectid + ins->offset - 1,
3189                                 EXTENT_WRITEBACK, GFP_NOFS);
3190                 set_state_private(&root->fs_info->extent_ins,
3191                                   ins->objectid, (unsigned long)extent_op);
3192                 mutex_unlock(&root->fs_info->extent_ins_mutex);
3193                 goto update_block;
3194         }
3195
3196         memcpy(&keys[0], ins, sizeof(*ins));
3197         keys[1].objectid = ins->objectid;
3198         keys[1].type = BTRFS_EXTENT_REF_KEY;
3199         keys[1].offset = parent;
3200         sizes[0] = sizeof(*extent_item);
3201         sizes[1] = sizeof(*ref);
3202
3203         path = btrfs_alloc_path();
3204         BUG_ON(!path);
3205
3206         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
3207                                        sizes, 2);
3208         BUG_ON(ret);
3209
3210         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3211                                      struct btrfs_extent_item);
3212         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
3213         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
3214                              struct btrfs_extent_ref);
3215
3216         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
3217         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
3218         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
3219         btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
3220
3221         btrfs_mark_buffer_dirty(path->nodes[0]);
3222
3223         trans->alloc_exclude_start = 0;
3224         trans->alloc_exclude_nr = 0;
3225         btrfs_free_path(path);
3226         finish_current_insert(trans, extent_root, 0);
3227         pending_ret = del_pending_extents(trans, extent_root, 0);
3228
3229         if (ret)
3230                 goto out;
3231         if (pending_ret) {
3232                 ret = pending_ret;
3233                 goto out;
3234         }
3235
3236 update_block:
3237         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
3238         if (ret) {
3239                 printk("update block group failed for %Lu %Lu\n",
3240                        ins->objectid, ins->offset);
3241                 BUG();
3242         }
3243 out:
3244         return ret;
3245 }
3246
3247 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3248                                 struct btrfs_root *root, u64 parent,
3249                                 u64 root_objectid, u64 ref_generation,
3250                                 u64 owner, struct btrfs_key *ins)
3251 {
3252         int ret;
3253
3254         if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
3255                 return 0;
3256         ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
3257                                             ref_generation, owner, ins);
3258         update_reserved_extents(root, ins->objectid, ins->offset, 0);
3259         return ret;
3260 }
3261
3262 /*
3263  * this is used by the tree logging recovery code.  It records that
3264  * an extent has been allocated and makes sure to clear the free
3265  * space cache bits as well
3266  */
3267 int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
3268                                 struct btrfs_root *root, u64 parent,
3269                                 u64 root_objectid, u64 ref_generation,
3270                                 u64 owner, struct btrfs_key *ins)
3271 {
3272         int ret;
3273         struct btrfs_block_group_cache *block_group;
3274
3275         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
3276         mutex_lock(&block_group->alloc_mutex);
3277         cache_block_group(root, block_group);
3278
3279         ret = btrfs_remove_free_space_lock(block_group, ins->objectid,
3280                                            ins->offset);
3281         mutex_unlock(&block_group->alloc_mutex);
3282         BUG_ON(ret);
3283         ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
3284                                             ref_generation, owner, ins);
3285         return ret;
3286 }
3287
3288 /*
3289  * finds a free extent and does all the dirty work required for allocation
3290  * returns the key for the extent through ins, and a tree buffer for
3291  * the first block of the extent through buf.
3292  *
3293  * returns 0 if everything worked, non-zero otherwise.
3294  */
3295 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
3296                        struct btrfs_root *root,
3297                        u64 num_bytes, u64 parent, u64 min_alloc_size,
3298                        u64 root_objectid, u64 ref_generation,
3299                        u64 owner_objectid, u64 empty_size, u64 hint_byte,
3300                        u64 search_end, struct btrfs_key *ins, u64 data)
3301 {
3302         int ret;
3303
3304         ret = __btrfs_reserve_extent(trans, root, num_bytes,
3305                                      min_alloc_size, empty_size, hint_byte,
3306                                      search_end, ins, data);
3307         BUG_ON(ret);
3308         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
3309                 ret = __btrfs_alloc_reserved_extent(trans, root, parent,
3310                                         root_objectid, ref_generation,
3311                                         owner_objectid, ins);
3312                 BUG_ON(ret);
3313
3314         } else {
3315                 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3316         }
3317         return ret;
3318 }
3319
3320 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
3321                                             struct btrfs_root *root,
3322                                             u64 bytenr, u32 blocksize)
3323 {
3324         struct extent_buffer *buf;
3325
3326         buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
3327         if (!buf)
3328                 return ERR_PTR(-ENOMEM);
3329         btrfs_set_header_generation(buf, trans->transid);
3330         btrfs_tree_lock(buf);
3331         clean_tree_block(trans, root, buf);
3332         btrfs_set_buffer_uptodate(buf);
3333         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
3334                 set_extent_dirty(&root->dirty_log_pages, buf->start,
3335                          buf->start + buf->len - 1, GFP_NOFS);
3336         } else {
3337                 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
3338                          buf->start + buf->len - 1, GFP_NOFS);
3339         }
3340         trans->blocks_used++;
3341         return buf;
3342 }
3343
3344 /*
3345  * helper function to allocate a block for a given tree
3346  * returns the tree buffer or NULL.
3347  */
3348 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3349                                              struct btrfs_root *root,
3350                                              u32 blocksize, u64 parent,
3351                                              u64 root_objectid,
3352                                              u64 ref_generation,
3353                                              int level,
3354                                              u64 hint,
3355                                              u64 empty_size)
3356 {
3357         struct btrfs_key ins;
3358         int ret;
3359         struct extent_buffer *buf;
3360
3361         ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
3362                                  root_objectid, ref_generation, level,
3363                                  empty_size, hint, (u64)-1, &ins, 0);
3364         if (ret) {
3365                 BUG_ON(ret > 0);
3366                 return ERR_PTR(ret);
3367         }
3368
3369         buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
3370         return buf;
3371 }
3372
3373 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3374                         struct btrfs_root *root, struct extent_buffer *leaf)
3375 {
3376         u64 leaf_owner;
3377         u64 leaf_generation;
3378         struct btrfs_key key;
3379         struct btrfs_file_extent_item *fi;
3380         int i;
3381         int nritems;
3382         int ret;
3383
3384         BUG_ON(!btrfs_is_leaf(leaf));
3385         nritems = btrfs_header_nritems(leaf);
3386         leaf_owner = btrfs_header_owner(leaf);
3387         leaf_generation = btrfs_header_generation(leaf);
3388
3389         for (i = 0; i < nritems; i++) {
3390                 u64 disk_bytenr;
3391                 cond_resched();
3392
3393                 btrfs_item_key_to_cpu(leaf, &key, i);
3394                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3395                         continue;
3396                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
3397                 if (btrfs_file_extent_type(leaf, fi) ==
3398                     BTRFS_FILE_EXTENT_INLINE)
3399                         continue;
3400                 /*
3401                  * FIXME make sure to insert a trans record that
3402                  * repeats the snapshot del on crash
3403                  */
3404                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
3405                 if (disk_bytenr == 0)
3406                         continue;
3407
3408                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
3409                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
3410                                 leaf->start, leaf_owner, leaf_generation,
3411                                 key.objectid, 0);
3412                 BUG_ON(ret);
3413
3414                 atomic_inc(&root->fs_info->throttle_gen);
3415                 wake_up(&root->fs_info->transaction_throttle);
3416                 cond_resched();
3417         }
3418         return 0;
3419 }
3420
3421 static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3422                                         struct btrfs_root *root,
3423                                         struct btrfs_leaf_ref *ref)
3424 {
3425         int i;
3426         int ret;
3427         struct btrfs_extent_info *info = ref->extents;
3428
3429         for (i = 0; i < ref->nritems; i++) {
3430                 ret = __btrfs_free_extent(trans, root, info->bytenr,
3431                                           info->num_bytes, ref->bytenr,
3432                                           ref->owner, ref->generation,
3433                                           info->objectid, 0);
3434
3435                 atomic_inc(&root->fs_info->throttle_gen);
3436                 wake_up(&root->fs_info->transaction_throttle);
3437                 cond_resched();
3438
3439                 BUG_ON(ret);
3440                 info++;
3441         }
3442
3443         return 0;
3444 }
3445
3446 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
3447                               u32 *refs)
3448 {
3449         int ret;
3450
3451         ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs);
3452         BUG_ON(ret);
3453
3454 #if 0 // some debugging code in case we see problems here
3455         /* if the refs count is one, it won't get increased again.  But
3456          * if the ref count is > 1, someone may be decreasing it at
3457          * the same time we are.
3458          */
3459         if (*refs != 1) {
3460                 struct extent_buffer *eb = NULL;
3461                 eb = btrfs_find_create_tree_block(root, start, len);
3462                 if (eb)
3463                         btrfs_tree_lock(eb);
3464
3465                 mutex_lock(&root->fs_info->alloc_mutex);
3466                 ret = lookup_extent_ref(NULL, root, start, len, refs);
3467                 BUG_ON(ret);
3468                 mutex_unlock(&root->fs_info->alloc_mutex);
3469
3470                 if (eb) {
3471                         btrfs_tree_unlock(eb);
3472                         free_extent_buffer(eb);
3473                 }
3474                 if (*refs == 1) {
3475                         printk("block %llu went down to one during drop_snap\n",
3476                                (unsigned long long)start);
3477                 }
3478
3479         }
3480 #endif
3481
3482         cond_resched();
3483         return ret;
3484 }
3485
3486 /*
3487  * helper function for drop_snapshot, this walks down the tree dropping ref
3488  * counts as it goes.
3489  */
3490 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
3491                                    struct btrfs_root *root,
3492                                    struct btrfs_path *path, int *level)
3493 {
3494         u64 root_owner;
3495         u64 root_gen;
3496         u64 bytenr;
3497         u64 ptr_gen;
3498         struct extent_buffer *next;
3499         struct extent_buffer *cur;
3500         struct extent_buffer *parent;
3501         struct btrfs_leaf_ref *ref;
3502         u32 blocksize;
3503         int ret;
3504         u32 refs;
3505
3506         WARN_ON(*level < 0);
3507         WARN_ON(*level >= BTRFS_MAX_LEVEL);
3508         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
3509                                 path->nodes[*level]->len, &refs);
3510         BUG_ON(ret);
3511         if (refs > 1)
3512                 goto out;
3513
3514         /*
3515          * walk down to the last node level and free all the leaves
3516          */
3517         while(*level >= 0) {
3518                 WARN_ON(*level < 0);
3519                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
3520                 cur = path->nodes[*level];
3521
3522                 if (btrfs_header_level(cur) != *level)
3523                         WARN_ON(1);
3524
3525                 if (path->slots[*level] >=
3526                     btrfs_header_nritems(cur))
3527                         break;
3528                 if (*level == 0) {
3529                         ret = btrfs_drop_leaf_ref(trans, root, cur);
3530                         BUG_ON(ret);
3531                         break;
3532                 }
3533                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
3534                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
3535                 blocksize = btrfs_level_size(root, *level - 1);
3536
3537                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
3538                 BUG_ON(ret);
3539                 if (refs != 1) {
3540                         parent = path->nodes[*level];
3541                         root_owner = btrfs_header_owner(parent);
3542                         root_gen = btrfs_header_generation(parent);
3543                         path->slots[*level]++;
3544
3545                         ret = __btrfs_free_extent(trans, root, bytenr,
3546                                                 blocksize, parent->start,
3547                                                 root_owner, root_gen,
3548                                                 *level - 1, 1);
3549                         BUG_ON(ret);
3550
3551                         atomic_inc(&root->fs_info->throttle_gen);
3552                         wake_up(&root->fs_info->transaction_throttle);
3553                         cond_resched();
3554
3555                         continue;
3556                 }
3557                 /*
3558                  * at this point, we have a single ref, and since the
3559                  * only place referencing this extent is a dead root
3560                  * the reference count should never go higher.
3561                  * So, we don't need to check it again
3562                  */
3563                 if (*level == 1) {
3564                         ref = btrfs_lookup_leaf_ref(root, bytenr);
3565                         if (ref && ref->generation != ptr_gen) {
3566                                 btrfs_free_leaf_ref(root, ref);
3567                                 ref = NULL;
3568                         }
3569                         if (ref) {
3570                                 ret = cache_drop_leaf_ref(trans, root, ref);
3571                                 BUG_ON(ret);
3572                                 btrfs_remove_leaf_ref(root, ref);
3573                                 btrfs_free_leaf_ref(root, ref);
3574                                 *level = 0;
3575                                 break;
3576                         }
3577                         if (printk_ratelimit()) {
3578                                 printk("leaf ref miss for bytenr %llu\n",
3579                                        (unsigned long long)bytenr);
3580                         }
3581                 }
3582                 next = btrfs_find_tree_block(root, bytenr, blocksize);
3583                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
3584                         free_extent_buffer(next);
3585
3586                         next = read_tree_block(root, bytenr, blocksize,
3587                                                ptr_gen);
3588                         cond_resched();
3589 #if 0
3590                         /*
3591                          * this is a debugging check and can go away
3592                          * the ref should never go all the way down to 1
3593                          * at this point
3594                          */
3595                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
3596                                                 &refs);
3597                         BUG_ON(ret);
3598                         WARN_ON(refs != 1);
3599 #endif
3600                 }
3601                 WARN_ON(*level <= 0);
3602                 if (path->nodes[*level-1])
3603                         free_extent_buffer(path->nodes[*level-1]);
3604                 path->nodes[*level-1] = next;
3605                 *level = btrfs_header_level(next);
3606                 path->slots[*level] = 0;
3607                 cond_resched();
3608         }
3609 out:
3610         WARN_ON(*level < 0);
3611         WARN_ON(*level >= BTRFS_MAX_LEVEL);
3612
3613         if (path->nodes[*level] == root->node) {
3614                 parent = path->nodes[*level];
3615                 bytenr = path->nodes[*level]->start;
3616         } else {
3617                 parent = path->nodes[*level + 1];
3618                 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
3619         }
3620
3621         blocksize = btrfs_level_size(root, *level);
3622         root_owner = btrfs_header_owner(parent);
3623         root_gen = btrfs_header_generation(parent);
3624
3625         ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
3626                                   parent->start, root_owner, root_gen,
3627                                   *level, 1);
3628         free_extent_buffer(path->nodes[*level]);
3629         path->nodes[*level] = NULL;
3630         *level += 1;
3631         BUG_ON(ret);
3632
3633         cond_resched();
3634         return 0;
3635 }
3636
3637 /*
3638  * helper function for drop_subtree, this function is similar to
3639  * walk_down_tree. The main difference is that it checks reference
3640  * counts while tree blocks are locked.
3641  */
3642 static int noinline walk_down_subtree(struct btrfs_trans_handle *trans,
3643                                       struct btrfs_root *root,
3644                                       struct btrfs_path *path, int *level)
3645 {
3646         struct extent_buffer *next;
3647         struct extent_buffer *cur;
3648         struct extent_buffer *parent;
3649         u64 bytenr;
3650         u64 ptr_gen;
3651         u32 blocksize;
3652         u32 refs;
3653         int ret;
3654
3655         cur = path->nodes[*level];
3656         ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len,
3657                                       &refs);
3658         BUG_ON(ret);
3659         if (refs > 1)
3660                 goto out;
3661
3662         while (*level >= 0) {
3663                 cur = path->nodes[*level];
3664                 if (*level == 0) {
3665                         ret = btrfs_drop_leaf_ref(trans, root, cur);
3666                         BUG_ON(ret);
3667                         clean_tree_block(trans, root, cur);
3668                         break;
3669                 }
3670                 if (path->slots[*level] >= btrfs_header_nritems(cur)) {
3671                         clean_tree_block(trans, root, cur);
3672                         break;
3673                 }
3674
3675                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
3676                 blocksize = btrfs_level_size(root, *level - 1);
3677                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
3678
3679                 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
3680                 btrfs_tree_lock(next);
3681
3682                 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
3683                                               &refs);
3684                 BUG_ON(ret);
3685                 if (refs > 1) {
3686                         parent = path->nodes[*level];
3687                         ret = btrfs_free_extent(trans, root, bytenr,
3688                                         blocksize, parent->start,
3689                                         btrfs_header_owner(parent),
3690                                         btrfs_header_generation(parent),
3691                                         *level - 1, 1);
3692                         BUG_ON(ret);
3693                         path->slots[*level]++;
3694                         btrfs_tree_unlock(next);
3695                         free_extent_buffer(next);
3696                         continue;
3697                 }
3698
3699                 *level = btrfs_header_level(next);
3700                 path->nodes[*level] = next;
3701                 path->slots[*level] = 0;
3702                 path->locks[*level] = 1;
3703                 cond_resched();
3704         }
3705 out:
3706         parent = path->nodes[*level + 1];
3707         bytenr = path->nodes[*level]->start;
3708         blocksize = path->nodes[*level]->len;
3709
3710         ret = btrfs_free_extent(trans, root, bytenr, blocksize,
3711                         parent->start, btrfs_header_owner(parent),
3712                         btrfs_header_generation(parent), *level, 1);
3713         BUG_ON(ret);
3714
3715         if (path->locks[*level]) {
3716                 btrfs_tree_unlock(path->nodes[*level]);
3717                 path->locks[*level] = 0;
3718         }
3719         free_extent_buffer(path->nodes[*level]);
3720         path->nodes[*level] = NULL;
3721         *level += 1;
3722         cond_resched();
3723         return 0;
3724 }
3725
3726 /*
3727  * helper for dropping snapshots.  This walks back up the tree in the path
3728  * to find the first node higher up where we haven't yet gone through
3729  * all the slots
3730  */
3731 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3732                                  struct btrfs_root *root,
3733                                  struct btrfs_path *path,
3734                                  int *level, int max_level)
3735 {
3736         u64 root_owner;
3737         u64 root_gen;
3738         struct btrfs_root_item *root_item = &root->root_item;
3739         int i;
3740         int slot;
3741         int ret;
3742
3743         for (i = *level; i < max_level && path->nodes[i]; i++) {
3744                 slot = path->slots[i];
3745                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3746                         struct extent_buffer *node;
3747                         struct btrfs_disk_key disk_key;
3748                         node = path->nodes[i];
3749                         path->slots[i]++;
3750                         *level = i;
3751                         WARN_ON(*level == 0);
3752                         btrfs_node_key(node, &disk_key, path->slots[i]);
3753                         memcpy(&root_item->drop_progress,
3754                                &disk_key, sizeof(disk_key));
3755                         root_item->drop_level = i;
3756                         return 0;
3757                 } else {
3758                         struct extent_buffer *parent;
3759                         if (path->nodes[*level] == root->node)
3760                                 parent = path->nodes[*level];
3761                         else
3762                                 parent = path->nodes[*level + 1];
3763
3764                         root_owner = btrfs_header_owner(parent);
3765                         root_gen = btrfs_header_generation(parent);
3766
3767                         clean_tree_block(trans, root, path->nodes[*level]);
3768                         ret = btrfs_free_extent(trans, root,
3769                                                 path->nodes[*level]->start,
3770                                                 path->nodes[*level]->len,
3771                                                 parent->start, root_owner,
3772                                                 root_gen, *level, 1);
3773                         BUG_ON(ret);
3774                         if (path->locks[*level]) {
3775                                 btrfs_tree_unlock(path->nodes[*level]);
3776                                 path->locks[*level] = 0;
3777                         }
3778                         free_extent_buffer(path->nodes[*level]);
3779                         path->nodes[*level] = NULL;
3780                         *level = i + 1;
3781                 }
3782         }
3783         return 1;
3784 }
3785
3786 /*
3787  * drop the reference count on the tree rooted at 'snap'.  This traverses
3788  * the tree freeing any blocks that have a ref count of zero after being
3789  * decremented.
3790  */
3791 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3792                         *root)
3793 {
3794         int ret = 0;
3795         int wret;
3796         int level;
3797         struct btrfs_path *path;
3798         int i;
3799         int orig_level;
3800         struct btrfs_root_item *root_item = &root->root_item;
3801
3802         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
3803         path = btrfs_alloc_path();
3804         BUG_ON(!path);
3805
3806         level = btrfs_header_level(root->node);
3807         orig_level = level;
3808         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3809                 path->nodes[level] = root->node;
3810                 extent_buffer_get(root->node);
3811                 path->slots[level] = 0;
3812         } else {
3813                 struct btrfs_key key;
3814                 struct btrfs_disk_key found_key;
3815                 struct extent_buffer *node;
3816
3817                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3818                 level = root_item->drop_level;
3819                 path->lowest_level = level;
3820                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3821                 if (wret < 0) {
3822                         ret = wret;
3823                         goto out;
3824                 }
3825                 node = path->nodes[level];
3826                 btrfs_node_key(node, &found_key, path->slots[level]);
3827                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3828                                sizeof(found_key)));
3829                 /*
3830                  * unlock our path, this is safe because only this
3831                  * function is allowed to delete this snapshot
3832                  */
3833                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
3834                         if (path->nodes[i] && path->locks[i]) {
3835                                 path->locks[i] = 0;
3836                                 btrfs_tree_unlock(path->nodes[i]);
3837                         }
3838                 }
3839         }
3840         while(1) {
3841                 wret = walk_down_tree(trans, root, path, &level);
3842                 if (wret > 0)
3843                         break;
3844                 if (wret < 0)
3845                         ret = wret;
3846
3847                 wret = walk_up_tree(trans, root, path, &level,
3848                                     BTRFS_MAX_LEVEL);
3849                 if (wret > 0)
3850                         break;
3851                 if (wret < 0)
3852                         ret = wret;
3853                 if (trans->transaction->in_commit) {
3854                         ret = -EAGAIN;
3855                         break;
3856                 }
3857                 atomic_inc(&root->fs_info->throttle_gen);
3858                 wake_up(&root->fs_info->transaction_throttle);
3859         }
3860         for (i = 0; i <= orig_level; i++) {
3861                 if (path->nodes[i]) {
3862                         free_extent_buffer(path->nodes[i]);
3863                         path->nodes[i] = NULL;
3864                 }
3865         }
3866 out:
3867         btrfs_free_path(path);
3868         return ret;
3869 }
3870
3871 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
3872                         struct btrfs_root *root,
3873                         struct extent_buffer *node,
3874                         struct extent_buffer *parent)
3875 {
3876         struct btrfs_path *path;
3877         int level;
3878         int parent_level;
3879         int ret = 0;
3880         int wret;
3881
3882         path = btrfs_alloc_path();
3883         BUG_ON(!path);
3884
3885         BUG_ON(!btrfs_tree_locked(parent));
3886         parent_level = btrfs_header_level(parent);
3887         extent_buffer_get(parent);
3888         path->nodes[parent_level] = parent;
3889         path->slots[parent_level] = btrfs_header_nritems(parent);
3890
3891         BUG_ON(!btrfs_tree_locked(node));
3892         level = btrfs_header_level(node);
3893         extent_buffer_get(node);
3894         path->nodes[level] = node;
3895         path->slots[level] = 0;
3896
3897         while (1) {
3898                 wret = walk_down_subtree(trans, root, path, &level);
3899                 if (wret < 0)
3900                         ret = wret;
3901                 if (wret != 0)
3902                         break;
3903
3904                 wret = walk_up_tree(trans, root, path, &level, parent_level);
3905                 if (wret < 0)
3906                         ret = wret;
3907                 if (wret != 0)
3908                         break;
3909         }
3910
3911         btrfs_free_path(path);
3912         return ret;
3913 }
3914
3915 static unsigned long calc_ra(unsigned long start, unsigned long last,
3916                              unsigned long nr)
3917 {
3918         return min(last, start + nr - 1);
3919 }
3920
3921 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
3922                                          u64 len)
3923 {
3924         u64 page_start;
3925         u64 page_end;
3926         unsigned long first_index;
3927         unsigned long last_index;
3928         unsigned long i;
3929         struct page *page;
3930         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3931         struct file_ra_state *ra;
3932         struct btrfs_ordered_extent *ordered;
3933         unsigned int total_read = 0;
3934         unsigned int total_dirty = 0;
3935         int ret = 0;
3936
3937         ra = kzalloc(sizeof(*ra), GFP_NOFS);
3938
3939         mutex_lock(&inode->i_mutex);
3940         first_index = start >> PAGE_CACHE_SHIFT;
3941         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
3942
3943         /* make sure the dirty trick played by the caller work */
3944         ret = invalidate_inode_pages2_range(inode->i_mapping,
3945                                             first_index, last_index);
3946         if (ret)
3947                 goto out_unlock;
3948
3949         file_ra_state_init(ra, inode->i_mapping);
3950
3951         for (i = first_index ; i <= last_index; i++) {
3952                 if (total_read % ra->ra_pages == 0) {
3953                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
3954                                        calc_ra(i, last_index, ra->ra_pages));
3955                 }
3956                 total_read++;
3957 again:
3958                 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
3959                         BUG_ON(1);
3960                 page = grab_cache_page(inode->i_mapping, i);
3961                 if (!page) {
3962                         ret = -ENOMEM;
3963                         goto out_unlock;
3964                 }
3965                 if (!PageUptodate(page)) {
3966                         btrfs_readpage(NULL, page);
3967                         lock_page(page);
3968                         if (!PageUptodate(page)) {
3969                                 unlock_page(page);
3970                                 page_cache_release(page);
3971                                 ret = -EIO;
3972                                 goto out_unlock;
3973                         }
3974                 }
3975                 wait_on_page_writeback(page);
3976
3977                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
3978                 page_end = page_start + PAGE_CACHE_SIZE - 1;
3979                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
3980
3981                 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3982                 if (ordered) {
3983                         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3984                         unlock_page(page);
3985                         page_cache_release(page);
3986                         btrfs_start_ordered_extent(inode, ordered, 1);
3987                         btrfs_put_ordered_extent(ordered);
3988                         goto again;
3989                 }
3990                 set_page_extent_mapped(page);
3991
3992                 btrfs_set_extent_delalloc(inode, page_start, page_end);
3993                 if (i == first_index)
3994                         set_extent_bits(io_tree, page_start, page_end,
3995                                         EXTENT_BOUNDARY, GFP_NOFS);
3996
3997                 set_page_dirty(page);
3998                 total_dirty++;
3999
4000                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4001                 unlock_page(page);
4002                 page_cache_release(page);
4003         }
4004
4005 out_unlock:
4006         kfree(ra);
4007         mutex_unlock(&inode->i_mutex);
4008         balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
4009         return ret;
4010 }
4011
4012 static int noinline relocate_data_extent(struct inode *reloc_inode,
4013                                          struct btrfs_key *extent_key,
4014                                          u64 offset)
4015 {
4016         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
4017         struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
4018         struct extent_map *em;
4019         u64 start = extent_key->objectid - offset;
4020         u64 end = start + extent_key->offset - 1;
4021
4022         em = alloc_extent_map(GFP_NOFS);
4023         BUG_ON(!em || IS_ERR(em));
4024
4025         em->start = start;
4026         em->len = extent_key->offset;
4027         em->block_len = extent_key->offset;
4028         em->block_start = extent_key->objectid;
4029         em->bdev = root->fs_info->fs_devices->latest_bdev;
4030         set_bit(EXTENT_FLAG_PINNED, &em->flags);
4031
4032         /* setup extent map to cheat btrfs_readpage */
4033         lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
4034         while (1) {
4035                 int ret;
4036                 spin_lock(&em_tree->lock);
4037                 ret = add_extent_mapping(em_tree, em);
4038                 spin_unlock(&em_tree->lock);
4039                 if (ret != -EEXIST) {
4040                         free_extent_map(em);
4041                         break;
4042                 }
4043                 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
4044         }
4045         unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
4046
4047         return relocate_inode_pages(reloc_inode, start, extent_key->offset);
4048 }
4049
4050 struct btrfs_ref_path {
4051         u64 extent_start;
4052         u64 nodes[BTRFS_MAX_LEVEL];
4053         u64 root_objectid;
4054         u64 root_generation;
4055         u64 owner_objectid;
4056         u32 num_refs;
4057         int lowest_level;
4058         int current_level;
4059         int shared_level;
4060
4061         struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
4062         u64 new_nodes[BTRFS_MAX_LEVEL];
4063 };
4064
4065 struct disk_extent {
4066         u64 ram_bytes;
4067         u64 disk_bytenr;
4068         u64 disk_num_bytes;
4069         u64 offset;
4070         u64 num_bytes;
4071         u8 compression;
4072         u8 encryption;
4073         u16 other_encoding;
4074 };
4075
4076 static int is_cowonly_root(u64 root_objectid)
4077 {
4078         if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
4079             root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
4080             root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
4081             root_objectid == BTRFS_DEV_TREE_OBJECTID ||
4082             root_objectid == BTRFS_TREE_LOG_OBJECTID)
4083                 return 1;
4084         return 0;
4085 }
4086
4087 static int noinline __next_ref_path(struct btrfs_trans_handle *trans,
4088                                     struct btrfs_root *extent_root,
4089                                     struct btrfs_ref_path *ref_path,
4090                                     int first_time)
4091 {
4092         struct extent_buffer *leaf;
4093         struct btrfs_path *path;
4094         struct btrfs_extent_ref *ref;
4095         struct btrfs_key key;
4096         struct btrfs_key found_key;
4097         u64 bytenr;
4098         u32 nritems;
4099         int level;
4100         int ret = 1;
4101
4102         path = btrfs_alloc_path();
4103         if (!path)
4104                 return -ENOMEM;
4105
4106         if (first_time) {
4107                 ref_path->lowest_level = -1;
4108                 ref_path->current_level = -1;
4109                 ref_path->shared_level = -1;
4110                 goto walk_up;
4111         }
4112 walk_down:
4113         level = ref_path->current_level - 1;
4114         while (level >= -1) {
4115                 u64 parent;
4116                 if (level < ref_path->lowest_level)
4117                         break;
4118
4119                 if (level >= 0) {
4120                         bytenr = ref_path->nodes[level];
4121                 } else {
4122                         bytenr = ref_path->extent_start;
4123                 }
4124                 BUG_ON(bytenr == 0);
4125
4126                 parent = ref_path->nodes[level + 1];
4127                 ref_path->nodes[level + 1] = 0;
4128                 ref_path->current_level = level;
4129                 BUG_ON(parent == 0);
4130
4131                 key.objectid = bytenr;
4132                 key.offset = parent + 1;
4133                 key.type = BTRFS_EXTENT_REF_KEY;
4134
4135                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
4136                 if (ret < 0)
4137                         goto out;
4138                 BUG_ON(ret == 0);
4139
4140                 leaf = path->nodes[0];
4141                 nritems = btrfs_header_nritems(leaf);
4142                 if (path->slots[0] >= nritems) {
4143                         ret = btrfs_next_leaf(extent_root, path);
4144                         if (ret < 0)
4145                                 goto out;
4146                         if (ret > 0)
4147                                 goto next;
4148                         leaf = path->nodes[0];
4149                 }
4150
4151                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4152                 if (found_key.objectid == bytenr &&
4153                     found_key.type == BTRFS_EXTENT_REF_KEY) {
4154                         if (level < ref_path->shared_level)
4155                                 ref_path->shared_level = level;
4156                         goto found;
4157                 }
4158 next:
4159                 level--;
4160                 btrfs_release_path(extent_root, path);
4161                 cond_resched();
4162         }
4163         /* reached lowest level */
4164         ret = 1;
4165         goto out;
4166 walk_up:
4167         level = ref_path->current_level;
4168         while (level < BTRFS_MAX_LEVEL - 1) {
4169                 u64 ref_objectid;
4170                 if (level >= 0) {
4171                         bytenr = ref_path->nodes[level];
4172                 } else {
4173                         bytenr = ref_path->extent_start;
4174                 }
4175                 BUG_ON(bytenr == 0);
4176
4177                 key.objectid = bytenr;
4178                 key.offset = 0;
4179                 key.type = BTRFS_EXTENT_REF_KEY;
4180
4181                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
4182                 if (ret < 0)
4183                         goto out;
4184
4185                 leaf = path->nodes[0];
4186                 nritems = btrfs_header_nritems(leaf);
4187                 if (path->slots[0] >= nritems) {
4188                         ret = btrfs_next_leaf(extent_root, path);
4189                         if (ret < 0)
4190                                 goto out;
4191                         if (ret > 0) {
4192                                 /* the extent was freed by someone */
4193                                 if (ref_path->lowest_level == level)
4194                                         goto out;
4195                                 btrfs_release_path(extent_root, path);
4196                                 goto walk_down;
4197                         }
4198                         leaf = path->nodes[0];
4199                 }
4200
4201                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4202                 if (found_key.objectid != bytenr ||
4203                                 found_key.type != BTRFS_EXTENT_REF_KEY) {
4204                         /* the extent was freed by someone */
4205                         if (ref_path->lowest_level == level) {
4206                                 ret = 1;
4207                                 goto out;
4208                         }
4209                         btrfs_release_path(extent_root, path);
4210                         goto walk_down;
4211                 }
4212 found:
4213                 ref = btrfs_item_ptr(leaf, path->slots[0],
4214                                 struct btrfs_extent_ref);
4215                 ref_objectid = btrfs_ref_objectid(leaf, ref);
4216                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
4217                         if (first_time) {
4218                                 level = (int)ref_objectid;
4219                                 BUG_ON(level >= BTRFS_MAX_LEVEL);
4220                                 ref_path->lowest_level = level;
4221                                 ref_path->current_level = level;
4222                                 ref_path->nodes[level] = bytenr;
4223                         } else {
4224                                 WARN_ON(ref_objectid != level);
4225                         }
4226                 } else {
4227                         WARN_ON(level != -1);
4228                 }
4229                 first_time = 0;
4230
4231                 if (ref_path->lowest_level == level) {
4232                         ref_path->owner_objectid = ref_objectid;
4233                         ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
4234                 }
4235
4236                 /*
4237                  * the block is tree root or the block isn't in reference
4238                  * counted tree.
4239                  */
4240                 if (found_key.objectid == found_key.offset ||
4241                     is_cowonly_root(btrfs_ref_root(leaf, ref))) {
4242                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
4243                         ref_path->root_generation =
4244                                 btrfs_ref_generation(leaf, ref);
4245                         if (level < 0) {
4246                                 /* special reference from the tree log */
4247                                 ref_path->nodes[0] = found_key.offset;
4248                                 ref_path->current_level = 0;
4249                         }
4250                         ret = 0;
4251                         goto out;
4252                 }
4253
4254                 level++;
4255                 BUG_ON(ref_path->nodes[level] != 0);
4256                 ref_path->nodes[level] = found_key.offset;
4257                 ref_path->current_level = level;
4258
4259                 /*
4260                  * the reference was created in the running transaction,
4261                  * no need to continue walking up.
4262                  */
4263                 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
4264                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
4265                         ref_path->root_generation =
4266                                 btrfs_ref_generation(leaf, ref);
4267                         ret = 0;
4268                         goto out;
4269                 }
4270
4271                 btrfs_release_path(extent_root, path);
4272                 cond_resched();
4273         }
4274         /* reached max tree level, but no tree root found. */
4275         BUG();
4276 out:
4277         btrfs_free_path(path);
4278         return ret;
4279 }
4280
4281 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
4282                                 struct btrfs_root *extent_root,
4283                                 struct btrfs_ref_path *ref_path,
4284                                 u64 extent_start)
4285 {
4286         memset(ref_path, 0, sizeof(*ref_path));
4287         ref_path->extent_start = extent_start;
4288
4289         return __next_ref_path(trans, extent_root, ref_path, 1);
4290 }
4291
4292 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
4293                                struct btrfs_root *extent_root,
4294                                struct btrfs_ref_path *ref_path)
4295 {
4296         return __next_ref_path(trans, extent_root, ref_path, 0);
4297 }
4298
4299 static int noinline get_new_locations(struct inode *reloc_inode,
4300                                       struct btrfs_key *extent_key,
4301                                       u64 offset, int no_fragment,
4302                                       struct disk_extent **extents,
4303                                       int *nr_extents)
4304 {
4305         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
4306         struct btrfs_path *path;
4307         struct btrfs_file_extent_item *fi;
4308         struct extent_buffer *leaf;
4309         struct disk_extent *exts = *extents;
4310         struct btrfs_key found_key;
4311         u64 cur_pos;
4312         u64 last_byte;
4313         u32 nritems;
4314         int nr = 0;
4315         int max = *nr_extents;
4316         int ret;
4317
4318         WARN_ON(!no_fragment && *extents);
4319         if (!exts) {
4320                 max = 1;
4321                 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
4322                 if (!exts)
4323                         return -ENOMEM;
4324         }
4325
4326         path = btrfs_alloc_path();
4327         BUG_ON(!path);
4328
4329         cur_pos = extent_key->objectid - offset;
4330         last_byte = extent_key->objectid + extent_key->offset;
4331         ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
4332                                        cur_pos, 0);
4333         if (ret < 0)
4334                 goto out;
4335         if (ret > 0) {
4336                 ret = -ENOENT;
4337                 goto out;
4338         }
4339
4340         while (1) {
4341                 leaf = path->nodes[0];
4342                 nritems = btrfs_header_nritems(leaf);
4343                 if (path->slots[0] >= nritems) {
4344                         ret = btrfs_next_leaf(root, path);
4345                         if (ret < 0)
4346                                 goto out;
4347                         if (ret > 0)
4348                                 break;
4349                         leaf = path->nodes[0];
4350                 }
4351
4352                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4353                 if (found_key.offset != cur_pos ||
4354                     found_key.type != BTRFS_EXTENT_DATA_KEY ||
4355                     found_key.objectid != reloc_inode->i_ino)
4356                         break;
4357
4358                 fi = btrfs_item_ptr(leaf, path->slots[0],
4359                                     struct btrfs_file_extent_item);
4360                 if (btrfs_file_extent_type(leaf, fi) !=
4361                     BTRFS_FILE_EXTENT_REG ||
4362                     btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
4363                         break;
4364
4365                 if (nr == max) {
4366                         struct disk_extent *old = exts;
4367                         max *= 2;
4368                         exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
4369                         memcpy(exts, old, sizeof(*exts) * nr);
4370                         if (old != *extents)
4371                                 kfree(old);
4372                 }
4373
4374                 exts[nr].disk_bytenr =
4375                         btrfs_file_extent_disk_bytenr(leaf, fi);
4376                 exts[nr].disk_num_bytes =
4377                         btrfs_file_extent_disk_num_bytes(leaf, fi);
4378                 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
4379                 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4380                 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
4381                 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
4382                 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
4383                 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
4384                                                                            fi);
4385                 BUG_ON(exts[nr].offset > 0);
4386                 BUG_ON(exts[nr].compression || exts[nr].encryption);
4387                 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
4388
4389                 cur_pos += exts[nr].num_bytes;
4390                 nr++;
4391
4392                 if (cur_pos + offset >= last_byte)
4393                         break;
4394
4395                 if (no_fragment) {
4396                         ret = 1;
4397                         goto out;
4398                 }
4399                 path->slots[0]++;
4400         }
4401
4402         WARN_ON(cur_pos + offset > last_byte);
4403         if (cur_pos + offset < last_byte) {
4404                 ret = -ENOENT;
4405                 goto out;
4406         }
4407         ret = 0;
4408 out:
4409         btrfs_free_path(path);
4410         if (ret) {
4411                 if (exts != *extents)
4412                         kfree(exts);
4413         } else {
4414                 *extents = exts;
4415                 *nr_extents = nr;
4416         }
4417         return ret;
4418 }
4419
4420 static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
4421                                         struct btrfs_root *root,
4422                                         struct btrfs_path *path,
4423                                         struct btrfs_key *extent_key,
4424                                         struct btrfs_key *leaf_key,
4425                                         struct btrfs_ref_path *ref_path,
4426                                         struct disk_extent *new_extents,
4427                                         int nr_extents)
4428 {
4429         struct extent_buffer *leaf;
4430         struct btrfs_file_extent_item *fi;
4431         struct inode *inode = NULL;
4432         struct btrfs_key key;
4433         u64 lock_start = 0;
4434         u64 lock_end = 0;
4435         u64 num_bytes;
4436         u64 ext_offset;
4437         u64 first_pos;
4438         u32 nritems;
4439         int nr_scaned = 0;
4440         int extent_locked = 0;
4441         int extent_type;
4442         int ret;
4443
4444         memcpy(&key, leaf_key, sizeof(key));
4445         first_pos = INT_LIMIT(loff_t) - extent_key->offset;
4446         if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
4447                 if (key.objectid < ref_path->owner_objectid ||
4448                     (key.objectid == ref_path->owner_objectid &&
4449                      key.type < BTRFS_EXTENT_DATA_KEY)) {
4450                         key.objectid = ref_path->owner_objectid;
4451                         key.type = BTRFS_EXTENT_DATA_KEY;
4452                         key.offset = 0;
4453                 }
4454         }
4455
4456         while (1) {
4457                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4458                 if (ret < 0)
4459                         goto out;
4460
4461                 leaf = path->nodes[0];
4462                 nritems = btrfs_header_nritems(leaf);
4463 next:
4464                 if (extent_locked && ret > 0) {
4465                         /*
4466                          * the file extent item was modified by someone
4467                          * before the extent got locked.
4468                          */
4469                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4470                                       lock_end, GFP_NOFS);
4471                         extent_locked = 0;
4472                 }
4473
4474                 if (path->slots[0] >= nritems) {
4475                         if (++nr_scaned > 2)
4476                                 break;
4477
4478                         BUG_ON(extent_locked);
4479                         ret = btrfs_next_leaf(root, path);
4480                         if (ret < 0)
4481                                 goto out;
4482                         if (ret > 0)
4483                                 break;
4484                         leaf = path->nodes[0];
4485                         nritems = btrfs_header_nritems(leaf);
4486                 }
4487
4488                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4489
4490                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
4491                         if ((key.objectid > ref_path->owner_objectid) ||
4492                             (key.objectid == ref_path->owner_objectid &&
4493                              key.type > BTRFS_EXTENT_DATA_KEY) ||
4494                             (key.offset >= first_pos + extent_key->offset))
4495                                 break;
4496                 }
4497
4498                 if (inode && key.objectid != inode->i_ino) {
4499                         BUG_ON(extent_locked);
4500                         btrfs_release_path(root, path);
4501                         mutex_unlock(&inode->i_mutex);
4502                         iput(inode);
4503                         inode = NULL;
4504                         continue;
4505                 }
4506
4507                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
4508                         path->slots[0]++;
4509                         ret = 1;
4510                         goto next;
4511                 }
4512                 fi = btrfs_item_ptr(leaf, path->slots[0],
4513                                     struct btrfs_file_extent_item);
4514                 extent_type = btrfs_file_extent_type(leaf, fi);
4515                 if ((extent_type != BTRFS_FILE_EXTENT_REG &&
4516                      extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
4517                     (btrfs_file_extent_disk_bytenr(leaf, fi) !=
4518                      extent_key->objectid)) {
4519                         path->slots[0]++;
4520                         ret = 1;
4521                         goto next;
4522                 }
4523
4524                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4525                 ext_offset = btrfs_file_extent_offset(leaf, fi);
4526
4527                 if (first_pos > key.offset - ext_offset)
4528                         first_pos = key.offset - ext_offset;
4529
4530                 if (!extent_locked) {
4531                         lock_start = key.offset;
4532                         lock_end = lock_start + num_bytes - 1;
4533                 } else {
4534                         if (lock_start > key.offset ||
4535                             lock_end + 1 < key.offset + num_bytes) {
4536                                 unlock_extent(&BTRFS_I(inode)->io_tree,
4537                                               lock_start, lock_end, GFP_NOFS);
4538                                 extent_locked = 0;
4539                         }
4540                 }
4541
4542                 if (!inode) {
4543                         btrfs_release_path(root, path);
4544
4545                         inode = btrfs_iget_locked(root->fs_info->sb,
4546                                                   key.objectid, root);
4547                         if (inode->i_state & I_NEW) {
4548                                 BTRFS_I(inode)->root = root;
4549                                 BTRFS_I(inode)->location.objectid =
4550                                         key.objectid;
4551                                 BTRFS_I(inode)->location.type =
4552                                         BTRFS_INODE_ITEM_KEY;
4553                                 BTRFS_I(inode)->location.offset = 0;
4554                                 btrfs_read_locked_inode(inode);
4555                                 unlock_new_inode(inode);
4556                         }
4557                         /*
4558                          * some code call btrfs_commit_transaction while
4559                          * holding the i_mutex, so we can't use mutex_lock
4560                          * here.
4561                          */
4562                         if (is_bad_inode(inode) ||
4563                             !mutex_trylock(&inode->i_mutex)) {
4564                                 iput(inode);
4565                                 inode = NULL;
4566                                 key.offset = (u64)-1;
4567                                 goto skip;
4568                         }
4569                 }
4570
4571                 if (!extent_locked) {
4572                         struct btrfs_ordered_extent *ordered;
4573
4574                         btrfs_release_path(root, path);
4575
4576                         lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4577                                     lock_end, GFP_NOFS);
4578                         ordered = btrfs_lookup_first_ordered_extent(inode,
4579                                                                     lock_end);
4580                         if (ordered &&
4581                             ordered->file_offset <= lock_end &&
4582                             ordered->file_offset + ordered->len > lock_start) {
4583                                 unlock_extent(&BTRFS_I(inode)->io_tree,
4584                                               lock_start, lock_end, GFP_NOFS);
4585                                 btrfs_start_ordered_extent(inode, ordered, 1);
4586                                 btrfs_put_ordered_extent(ordered);
4587                                 key.offset += num_bytes;
4588                                 goto skip;
4589                         }
4590                         if (ordered)
4591                                 btrfs_put_ordered_extent(ordered);
4592
4593                         extent_locked = 1;
4594                         continue;
4595                 }
4596
4597                 if (nr_extents == 1) {
4598                         /* update extent pointer in place */
4599                         btrfs_set_file_extent_disk_bytenr(leaf, fi,
4600                                                 new_extents[0].disk_bytenr);
4601                         btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4602                                                 new_extents[0].disk_num_bytes);
4603                         btrfs_mark_buffer_dirty(leaf);
4604
4605                         btrfs_drop_extent_cache(inode, key.offset,
4606                                                 key.offset + num_bytes - 1, 0);
4607
4608                         ret = btrfs_inc_extent_ref(trans, root,
4609                                                 new_extents[0].disk_bytenr,
4610                                                 new_extents[0].disk_num_bytes,
4611                                                 leaf->start,
4612                                                 root->root_key.objectid,
4613                                                 trans->transid,
4614                                                 key.objectid);
4615                         BUG_ON(ret);
4616
4617                         ret = btrfs_free_extent(trans, root,
4618                                                 extent_key->objectid,
4619                                                 extent_key->offset,
4620                                                 leaf->start,
4621                                                 btrfs_header_owner(leaf),
4622                                                 btrfs_header_generation(leaf),
4623                                                 key.objectid, 0);
4624                         BUG_ON(ret);
4625
4626                         btrfs_release_path(root, path);
4627                         key.offset += num_bytes;
4628                 } else {
4629                         BUG_ON(1);
4630 #if 0
4631                         u64 alloc_hint;
4632                         u64 extent_len;
4633                         int i;
4634                         /*
4635                          * drop old extent pointer at first, then insert the
4636                          * new pointers one bye one
4637                          */
4638                         btrfs_release_path(root, path);
4639                         ret = btrfs_drop_extents(trans, root, inode, key.offset,
4640                                                  key.offset + num_bytes,
4641                                                  key.offset, &alloc_hint);
4642                         BUG_ON(ret);
4643
4644                         for (i = 0; i < nr_extents; i++) {
4645                                 if (ext_offset >= new_extents[i].num_bytes) {
4646                                         ext_offset -= new_extents[i].num_bytes;
4647                                         continue;
4648                                 }
4649                                 extent_len = min(new_extents[i].num_bytes -
4650                                                  ext_offset, num_bytes);
4651
4652                                 ret = btrfs_insert_empty_item(trans, root,
4653                                                               path, &key,
4654                                                               sizeof(*fi));
4655                                 BUG_ON(ret);
4656
4657                                 leaf = path->nodes[0];
4658                                 fi = btrfs_item_ptr(leaf, path->slots[0],
4659                                                 struct btrfs_file_extent_item);
4660                                 btrfs_set_file_extent_generation(leaf, fi,
4661                                                         trans->transid);
4662                                 btrfs_set_file_extent_type(leaf, fi,
4663                                                         BTRFS_FILE_EXTENT_REG);
4664                                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4665                                                 new_extents[i].disk_bytenr);
4666                                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4667                                                 new_extents[i].disk_num_bytes);
4668                                 btrfs_set_file_extent_ram_bytes(leaf, fi,
4669                                                 new_extents[i].ram_bytes);
4670
4671                                 btrfs_set_file_extent_compression(leaf, fi,
4672                                                 new_extents[i].compression);
4673                                 btrfs_set_file_extent_encryption(leaf, fi,
4674                                                 new_extents[i].encryption);
4675                                 btrfs_set_file_extent_other_encoding(leaf, fi,
4676                                                 new_extents[i].other_encoding);
4677
4678                                 btrfs_set_file_extent_num_bytes(leaf, fi,
4679                                                         extent_len);
4680                                 ext_offset += new_extents[i].offset;
4681                                 btrfs_set_file_extent_offset(leaf, fi,
4682                                                         ext_offset);
4683                                 btrfs_mark_buffer_dirty(leaf);
4684
4685                                 btrfs_drop_extent_cache(inode, key.offset,
4686                                                 key.offset + extent_len - 1, 0);
4687
4688                                 ret = btrfs_inc_extent_ref(trans, root,
4689                                                 new_extents[i].disk_bytenr,
4690                                                 new_extents[i].disk_num_bytes,
4691                                                 leaf->start,
4692                                                 root->root_key.objectid,
4693                                                 trans->transid, key.objectid);
4694                                 BUG_ON(ret);
4695                                 btrfs_release_path(root, path);
4696
4697                                 inode_add_bytes(inode, extent_len);
4698
4699                                 ext_offset = 0;
4700                                 num_bytes -= extent_len;
4701                                 key.offset += extent_len;
4702
4703                                 if (num_bytes == 0)
4704                                         break;
4705                         }
4706                         BUG_ON(i >= nr_extents);
4707 #endif
4708                 }
4709
4710                 if (extent_locked) {
4711                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4712                                       lock_end, GFP_NOFS);
4713                         extent_locked = 0;
4714                 }
4715 skip:
4716                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
4717                     key.offset >= first_pos + extent_key->offset)
4718                         break;
4719
4720                 cond_resched();
4721         }
4722         ret = 0;
4723 out:
4724         btrfs_release_path(root, path);
4725         if (inode) {
4726                 mutex_unlock(&inode->i_mutex);
4727                 if (extent_locked) {
4728                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4729                                       lock_end, GFP_NOFS);
4730                 }
4731                 iput(inode);
4732         }
4733         return ret;
4734 }
4735
4736 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
4737                                struct btrfs_root *root,
4738                                struct extent_buffer *buf, u64 orig_start)
4739 {
4740         int level;
4741         int ret;
4742
4743         BUG_ON(btrfs_header_generation(buf) != trans->transid);
4744         BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
4745
4746         level = btrfs_header_level(buf);
4747         if (level == 0) {
4748                 struct btrfs_leaf_ref *ref;
4749                 struct btrfs_leaf_ref *orig_ref;
4750
4751                 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
4752                 if (!orig_ref)
4753                         return -ENOENT;
4754
4755                 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
4756                 if (!ref) {
4757                         btrfs_free_leaf_ref(root, orig_ref);
4758                         return -ENOMEM;
4759                 }
4760
4761                 ref->nritems = orig_ref->nritems;
4762                 memcpy(ref->extents, orig_ref->extents,
4763                         sizeof(ref->extents[0]) * ref->nritems);
4764
4765                 btrfs_free_leaf_ref(root, orig_ref);
4766
4767                 ref->root_gen = trans->transid;
4768                 ref->bytenr = buf->start;
4769                 ref->owner = btrfs_header_owner(buf);
4770                 ref->generation = btrfs_header_generation(buf);
4771                 ret = btrfs_add_leaf_ref(root, ref, 0);
4772                 WARN_ON(ret);
4773                 btrfs_free_leaf_ref(root, ref);
4774         }
4775         return 0;
4776 }
4777
4778 static int noinline invalidate_extent_cache(struct btrfs_root *root,
4779                                         struct extent_buffer *leaf,
4780                                         struct btrfs_block_group_cache *group,
4781                                         struct btrfs_root *target_root)
4782 {
4783         struct btrfs_key key;
4784         struct inode *inode = NULL;
4785         struct btrfs_file_extent_item *fi;
4786         u64 num_bytes;
4787         u64 skip_objectid = 0;
4788         u32 nritems;
4789         u32 i;
4790
4791         nritems = btrfs_header_nritems(leaf);
4792         for (i = 0; i < nritems; i++) {
4793                 btrfs_item_key_to_cpu(leaf, &key, i);
4794                 if (key.objectid == skip_objectid ||
4795                     key.type != BTRFS_EXTENT_DATA_KEY)
4796                         continue;
4797                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4798                 if (btrfs_file_extent_type(leaf, fi) ==
4799                     BTRFS_FILE_EXTENT_INLINE)
4800                         continue;
4801                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
4802                         continue;
4803                 if (!inode || inode->i_ino != key.objectid) {
4804                         iput(inode);
4805                         inode = btrfs_ilookup(target_root->fs_info->sb,
4806                                               key.objectid, target_root, 1);
4807                 }
4808                 if (!inode) {
4809                         skip_objectid = key.objectid;
4810                         continue;
4811                 }
4812                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4813
4814                 lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4815                             key.offset + num_bytes - 1, GFP_NOFS);
4816                 btrfs_drop_extent_cache(inode, key.offset,
4817                                         key.offset + num_bytes - 1, 1);
4818                 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4819                               key.offset + num_bytes - 1, GFP_NOFS);
4820                 cond_resched();
4821         }
4822         iput(inode);
4823         return 0;
4824 }
4825
4826 static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans,
4827                                         struct btrfs_root *root,
4828                                         struct extent_buffer *leaf,
4829                                         struct btrfs_block_group_cache *group,
4830                                         struct inode *reloc_inode)
4831 {
4832         struct btrfs_key key;
4833         struct btrfs_key extent_key;
4834         struct btrfs_file_extent_item *fi;
4835         struct btrfs_leaf_ref *ref;
4836         struct disk_extent *new_extent;
4837         u64 bytenr;
4838         u64 num_bytes;
4839         u32 nritems;
4840         u32 i;
4841         int ext_index;
4842         int nr_extent;
4843         int ret;
4844
4845         new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
4846         BUG_ON(!new_extent);
4847
4848         ref = btrfs_lookup_leaf_ref(root, leaf->start);
4849         BUG_ON(!ref);
4850
4851         ext_index = -1;
4852         nritems = btrfs_header_nritems(leaf);
4853         for (i = 0; i < nritems; i++) {
4854                 btrfs_item_key_to_cpu(leaf, &key, i);
4855                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
4856                         continue;
4857                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4858                 if (btrfs_file_extent_type(leaf, fi) ==
4859                     BTRFS_FILE_EXTENT_INLINE)
4860                         continue;
4861                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4862                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4863                 if (bytenr == 0)
4864                         continue;
4865
4866                 ext_index++;
4867                 if (bytenr >= group->key.objectid + group->key.offset ||
4868                     bytenr + num_bytes <= group->key.objectid)
4869                         continue;
4870
4871                 extent_key.objectid = bytenr;
4872                 extent_key.offset = num_bytes;
4873                 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4874                 nr_extent = 1;
4875                 ret = get_new_locations(reloc_inode, &extent_key,
4876                                         group->key.objectid, 1,
4877                                         &new_extent, &nr_extent);
4878                 if (ret > 0)
4879                         continue;
4880                 BUG_ON(ret < 0);
4881
4882                 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
4883                 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
4884                 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
4885                 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
4886
4887                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4888                                                 new_extent->disk_bytenr);
4889                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4890                                                 new_extent->disk_num_bytes);
4891                 btrfs_mark_buffer_dirty(leaf);
4892
4893                 ret = btrfs_inc_extent_ref(trans, root,
4894                                         new_extent->disk_bytenr,
4895                                         new_extent->disk_num_bytes,
4896                                         leaf->start,
4897                                         root->root_key.objectid,
4898                                         trans->transid, key.objectid);
4899                 BUG_ON(ret);
4900                 ret = btrfs_free_extent(trans, root,
4901                                         bytenr, num_bytes, leaf->start,
4902                                         btrfs_header_owner(leaf),
4903                                         btrfs_header_generation(leaf),
4904                                         key.objectid, 0);
4905                 BUG_ON(ret);
4906                 cond_resched();
4907         }
4908         kfree(new_extent);
4909         BUG_ON(ext_index + 1 != ref->nritems);
4910         btrfs_free_leaf_ref(root, ref);
4911         return 0;
4912 }
4913
4914 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
4915                           struct btrfs_root *root)
4916 {
4917         struct btrfs_root *reloc_root;
4918         int ret;
4919
4920         if (root->reloc_root) {
4921                 reloc_root = root->reloc_root;
4922                 root->reloc_root = NULL;
4923                 list_add(&reloc_root->dead_list,
4924                          &root->fs_info->dead_reloc_roots);
4925
4926                 btrfs_set_root_bytenr(&reloc_root->root_item,
4927                                       reloc_root->node->start);
4928                 btrfs_set_root_level(&root->root_item,
4929                                      btrfs_header_level(reloc_root->node));
4930                 memset(&reloc_root->root_item.drop_progress, 0,
4931                         sizeof(struct btrfs_disk_key));
4932                 reloc_root->root_item.drop_level = 0;
4933
4934                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
4935                                         &reloc_root->root_key,
4936                                         &reloc_root->root_item);
4937                 BUG_ON(ret);
4938         }
4939         return 0;
4940 }
4941
4942 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
4943 {
4944         struct btrfs_trans_handle *trans;
4945         struct btrfs_root *reloc_root;
4946         struct btrfs_root *prev_root = NULL;
4947         struct list_head dead_roots;
4948         int ret;
4949         unsigned long nr;
4950
4951         INIT_LIST_HEAD(&dead_roots);
4952         list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
4953
4954         while (!list_empty(&dead_roots)) {
4955                 reloc_root = list_entry(dead_roots.prev,
4956                                         struct btrfs_root, dead_list);
4957                 list_del_init(&reloc_root->dead_list);
4958
4959                 BUG_ON(reloc_root->commit_root != NULL);
4960                 while (1) {
4961                         trans = btrfs_join_transaction(root, 1);
4962                         BUG_ON(!trans);
4963
4964                         mutex_lock(&root->fs_info->drop_mutex);
4965                         ret = btrfs_drop_snapshot(trans, reloc_root);
4966                         if (ret != -EAGAIN)
4967                                 break;
4968                         mutex_unlock(&root->fs_info->drop_mutex);
4969
4970                         nr = trans->blocks_used;
4971                         ret = btrfs_end_transaction(trans, root);
4972                         BUG_ON(ret);
4973                         btrfs_btree_balance_dirty(root, nr);
4974                 }
4975
4976                 free_extent_buffer(reloc_root->node);
4977
4978                 ret = btrfs_del_root(trans, root->fs_info->tree_root,
4979                                      &reloc_root->root_key);
4980                 BUG_ON(ret);
4981                 mutex_unlock(&root->fs_info->drop_mutex);
4982
4983                 nr = trans->blocks_used;
4984                 ret = btrfs_end_transaction(trans, root);
4985                 BUG_ON(ret);
4986                 btrfs_btree_balance_dirty(root, nr);
4987
4988                 kfree(prev_root);
4989                 prev_root = reloc_root;
4990         }
4991         if (prev_root) {
4992                 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
4993                 kfree(prev_root);
4994         }
4995         return 0;
4996 }
4997
4998 int btrfs_add_dead_reloc_root(struct btrfs_root *root)
4999 {
5000         list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
5001         return 0;
5002 }
5003
5004 int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
5005 {
5006         struct btrfs_root *reloc_root;
5007         struct btrfs_trans_handle *trans;
5008         struct btrfs_key location;
5009         int found;
5010         int ret;
5011
5012         mutex_lock(&root->fs_info->tree_reloc_mutex);
5013         ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
5014         BUG_ON(ret);
5015         found = !list_empty(&root->fs_info->dead_reloc_roots);
5016         mutex_unlock(&root->fs_info->tree_reloc_mutex);
5017
5018         if (found) {
5019                 trans = btrfs_start_transaction(root, 1);
5020                 BUG_ON(!trans);
5021                 ret = btrfs_commit_transaction(trans, root);
5022                 BUG_ON(ret);
5023         }
5024
5025         location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
5026         location.offset = (u64)-1;
5027         location.type = BTRFS_ROOT_ITEM_KEY;
5028
5029         reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
5030         BUG_ON(!reloc_root);
5031         btrfs_orphan_cleanup(reloc_root);
5032         return 0;
5033 }
5034
5035 static int noinline init_reloc_tree(struct btrfs_trans_handle *trans,
5036                                     struct btrfs_root *root)
5037 {
5038         struct btrfs_root *reloc_root;
5039         struct extent_buffer *eb;
5040         struct btrfs_root_item *root_item;
5041         struct btrfs_key root_key;
5042         int ret;
5043
5044         BUG_ON(!root->ref_cows);
5045         if (root->reloc_root)
5046                 return 0;
5047
5048         root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
5049         BUG_ON(!root_item);
5050
5051         ret = btrfs_copy_root(trans, root, root->commit_root,
5052                               &eb, BTRFS_TREE_RELOC_OBJECTID);
5053         BUG_ON(ret);
5054
5055         root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
5056         root_key.offset = root->root_key.objectid;
5057         root_key.type = BTRFS_ROOT_ITEM_KEY;
5058
5059         memcpy(root_item, &root->root_item, sizeof(root_item));
5060         btrfs_set_root_refs(root_item, 0);
5061         btrfs_set_root_bytenr(root_item, eb->start);
5062         btrfs_set_root_level(root_item, btrfs_header_level(eb));
5063         btrfs_set_root_generation(root_item, trans->transid);
5064
5065         btrfs_tree_unlock(eb);
5066         free_extent_buffer(eb);
5067
5068         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
5069                                 &root_key, root_item);
5070         BUG_ON(ret);
5071         kfree(root_item);
5072
5073         reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
5074                                                  &root_key);
5075         BUG_ON(!reloc_root);
5076         reloc_root->last_trans = trans->transid;
5077         reloc_root->commit_root = NULL;
5078         reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
5079
5080         root->reloc_root = reloc_root;
5081         return 0;
5082 }
5083
5084 /*
5085  * Core function of space balance.
5086  *
5087  * The idea is using reloc trees to relocate tree blocks in reference
5088  * counted roots. There is one reloc tree for each subvol, and all
5089  * reloc trees share same root key objectid. Reloc trees are snapshots
5090  * of the latest committed roots of subvols (root->commit_root).
5091  *
5092  * To relocate a tree block referenced by a subvol, there are two steps.
5093  * COW the block through subvol's reloc tree, then update block pointer
5094  * in the subvol to point to the new block. Since all reloc trees share
5095  * same root key objectid, doing special handing for tree blocks owned
5096  * by them is easy. Once a tree block has been COWed in one reloc tree,
5097  * we can use the resulting new block directly when the same block is
5098  * required to COW again through other reloc trees. By this way, relocated
5099  * tree blocks are shared between reloc trees, so they are also shared
5100  * between subvols.
5101  */
5102 static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
5103                                       struct btrfs_root *root,
5104                                       struct btrfs_path *path,
5105                                       struct btrfs_key *first_key,
5106                                       struct btrfs_ref_path *ref_path,
5107                                       struct btrfs_block_group_cache *group,
5108                                       struct inode *reloc_inode)
5109 {
5110         struct btrfs_root *reloc_root;
5111         struct extent_buffer *eb = NULL;
5112         struct btrfs_key *keys;
5113         u64 *nodes;
5114         int level;
5115         int shared_level;
5116         int lowest_level = 0;
5117         int ret;
5118
5119         if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
5120                 lowest_level = ref_path->owner_objectid;
5121
5122         if (!root->ref_cows) {
5123                 path->lowest_level = lowest_level;
5124                 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
5125                 BUG_ON(ret < 0);
5126                 path->lowest_level = 0;
5127                 btrfs_release_path(root, path);
5128                 return 0;
5129         }
5130
5131         mutex_lock(&root->fs_info->tree_reloc_mutex);
5132         ret = init_reloc_tree(trans, root);
5133         BUG_ON(ret);
5134         reloc_root = root->reloc_root;
5135
5136         shared_level = ref_path->shared_level;
5137         ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
5138
5139         keys = ref_path->node_keys;
5140         nodes = ref_path->new_nodes;
5141         memset(&keys[shared_level + 1], 0,
5142                sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
5143         memset(&nodes[shared_level + 1], 0,
5144                sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
5145
5146         if (nodes[lowest_level] == 0) {
5147                 path->lowest_level = lowest_level;
5148                 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
5149                                         0, 1);
5150                 BUG_ON(ret);
5151                 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
5152                         eb = path->nodes[level];
5153                         if (!eb || eb == reloc_root->node)
5154                                 break;
5155                         nodes[level] = eb->start;
5156                         if (level == 0)
5157                                 btrfs_item_key_to_cpu(eb, &keys[level], 0);
5158                         else
5159                                 btrfs_node_key_to_cpu(eb, &keys[level], 0);
5160                 }
5161                 if (nodes[0] &&
5162                     ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5163                         eb = path->nodes[0];
5164                         ret = replace_extents_in_leaf(trans, reloc_root, eb,
5165                                                       group, reloc_inode);
5166                         BUG_ON(ret);
5167                 }
5168                 btrfs_release_path(reloc_root, path);
5169         } else {
5170                 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
5171                                        lowest_level);
5172                 BUG_ON(ret);
5173         }
5174
5175         /*
5176          * replace tree blocks in the fs tree with tree blocks in
5177          * the reloc tree.
5178          */
5179         ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
5180         BUG_ON(ret < 0);
5181
5182         if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5183                 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
5184                                         0, 0);
5185                 BUG_ON(ret);
5186                 extent_buffer_get(path->nodes[0]);
5187                 eb = path->nodes[0];
5188                 btrfs_release_path(reloc_root, path);
5189                 ret = invalidate_extent_cache(reloc_root, eb, group, root);
5190                 BUG_ON(ret);
5191                 free_extent_buffer(eb);
5192         }
5193
5194         mutex_unlock(&root->fs_info->tree_reloc_mutex);
5195         path->lowest_level = 0;
5196         return 0;
5197 }
5198
5199 static int noinline relocate_tree_block(struct btrfs_trans_handle *trans,
5200                                         struct btrfs_root *root,
5201                                         struct btrfs_path *path,
5202                                         struct btrfs_key *first_key,
5203                                         struct btrfs_ref_path *ref_path)
5204 {
5205         int ret;
5206
5207         ret = relocate_one_path(trans, root, path, first_key,
5208                                 ref_path, NULL, NULL);
5209         BUG_ON(ret);
5210
5211         if (root == root->fs_info->extent_root)
5212                 btrfs_extent_post_op(trans, root);
5213
5214         return 0;
5215 }
5216
5217 static int noinline del_extent_zero(struct btrfs_trans_handle *trans,
5218                                     struct btrfs_root *extent_root,
5219                                     struct btrfs_path *path,
5220                                     struct btrfs_key *extent_key)
5221 {
5222         int ret;
5223
5224         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
5225         if (ret)
5226                 goto out;
5227         ret = btrfs_del_item(trans, extent_root, path);
5228 out:
5229         btrfs_release_path(extent_root, path);
5230         return ret;
5231 }
5232
5233 static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info,
5234                                                 struct btrfs_ref_path *ref_path)
5235 {
5236         struct btrfs_key root_key;
5237
5238         root_key.objectid = ref_path->root_objectid;
5239         root_key.type = BTRFS_ROOT_ITEM_KEY;
5240         if (is_cowonly_root(ref_path->root_objectid))
5241                 root_key.offset = 0;
5242         else
5243                 root_key.offset = (u64)-1;
5244
5245         return btrfs_read_fs_root_no_name(fs_info, &root_key);
5246 }
5247
5248 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
5249                                         struct btrfs_path *path,
5250                                         struct btrfs_key *extent_key,
5251                                         struct btrfs_block_group_cache *group,
5252                                         struct inode *reloc_inode, int pass)
5253 {
5254         struct btrfs_trans_handle *trans;
5255         struct btrfs_root *found_root;
5256         struct btrfs_ref_path *ref_path = NULL;
5257         struct disk_extent *new_extents = NULL;
5258         int nr_extents = 0;
5259         int loops;
5260         int ret;
5261         int level;
5262         struct btrfs_key first_key;
5263         u64 prev_block = 0;
5264
5265
5266         trans = btrfs_start_transaction(extent_root, 1);
5267         BUG_ON(!trans);
5268
5269         if (extent_key->objectid == 0) {
5270                 ret = del_extent_zero(trans, extent_root, path, extent_key);
5271                 goto out;
5272         }
5273
5274         ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
5275         if (!ref_path) {
5276                ret = -ENOMEM;
5277                goto out;
5278         }
5279
5280         for (loops = 0; ; loops++) {
5281                 if (loops == 0) {
5282                         ret = btrfs_first_ref_path(trans, extent_root, ref_path,
5283                                                    extent_key->objectid);
5284                 } else {
5285                         ret = btrfs_next_ref_path(trans, extent_root, ref_path);
5286                 }
5287                 if (ret < 0)
5288                         goto out;
5289                 if (ret > 0)
5290                         break;
5291
5292                 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
5293                     ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
5294                         continue;
5295
5296                 found_root = read_ref_root(extent_root->fs_info, ref_path);
5297                 BUG_ON(!found_root);
5298                 /*
5299                  * for reference counted tree, only process reference paths
5300                  * rooted at the latest committed root.
5301                  */
5302                 if (found_root->ref_cows &&
5303                     ref_path->root_generation != found_root->root_key.offset)
5304                         continue;
5305
5306                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5307                         if (pass == 0) {
5308                                 /*
5309                                  * copy data extents to new locations
5310                                  */
5311                                 u64 group_start = group->key.objectid;
5312                                 ret = relocate_data_extent(reloc_inode,
5313                                                            extent_key,
5314                                                            group_start);
5315                                 if (ret < 0)
5316                                         goto out;
5317                                 break;
5318                         }
5319                         level = 0;
5320                 } else {
5321                         level = ref_path->owner_objectid;
5322                 }
5323
5324                 if (prev_block != ref_path->nodes[level]) {
5325                         struct extent_buffer *eb;
5326                         u64 block_start = ref_path->nodes[level];
5327                         u64 block_size = btrfs_level_size(found_root, level);
5328
5329                         eb = read_tree_block(found_root, block_start,
5330                                              block_size, 0);
5331                         btrfs_tree_lock(eb);
5332                         BUG_ON(level != btrfs_header_level(eb));
5333
5334                         if (level == 0)
5335                                 btrfs_item_key_to_cpu(eb, &first_key, 0);
5336                         else
5337                                 btrfs_node_key_to_cpu(eb, &first_key, 0);
5338
5339                         btrfs_tree_unlock(eb);
5340                         free_extent_buffer(eb);
5341                         prev_block = block_start;
5342                 }
5343
5344                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
5345                     pass >= 2) {
5346                         /*
5347                          * use fallback method to process the remaining
5348                          * references.
5349                          */
5350                         if (!new_extents) {
5351                                 u64 group_start = group->key.objectid;
5352                                 new_extents = kmalloc(sizeof(*new_extents),
5353                                                       GFP_NOFS);
5354                                 nr_extents = 1;
5355                                 ret = get_new_locations(reloc_inode,
5356                                                         extent_key,
5357                                                         group_start, 1,
5358                                                         &new_extents,
5359                                                         &nr_extents);
5360                                 if (ret)
5361                                         goto out;
5362                         }
5363                         btrfs_record_root_in_trans(found_root);
5364                         ret = replace_one_extent(trans, found_root,
5365                                                 path, extent_key,
5366                                                 &first_key, ref_path,
5367                                                 new_extents, nr_extents);
5368                         if (ret < 0)
5369                                 goto out;
5370                         continue;
5371                 }
5372
5373                 btrfs_record_root_in_trans(found_root);
5374                 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
5375                         ret = relocate_tree_block(trans, found_root, path,
5376                                                   &first_key, ref_path);
5377                 } else {
5378                         /*
5379                          * try to update data extent references while
5380                          * keeping metadata shared between snapshots.
5381                          */
5382                         ret = relocate_one_path(trans, found_root, path,
5383                                                 &first_key, ref_path,
5384                                                 group, reloc_inode);
5385                 }
5386                 if (ret < 0)
5387                         goto out;
5388         }
5389         ret = 0;
5390 out:
5391         btrfs_end_transaction(trans, extent_root);
5392         kfree(new_extents);
5393         kfree(ref_path);
5394         return ret;
5395 }
5396
5397 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
5398 {
5399         u64 num_devices;
5400         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
5401                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
5402
5403         num_devices = root->fs_info->fs_devices->rw_devices;
5404         if (num_devices == 1) {
5405                 stripped |= BTRFS_BLOCK_GROUP_DUP;
5406                 stripped = flags & ~stripped;
5407
5408                 /* turn raid0 into single device chunks */
5409                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
5410                         return stripped;
5411
5412                 /* turn mirroring into duplication */
5413                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
5414                              BTRFS_BLOCK_GROUP_RAID10))
5415                         return stripped | BTRFS_BLOCK_GROUP_DUP;
5416                 return flags;
5417         } else {
5418                 /* they already had raid on here, just return */
5419                 if (flags & stripped)
5420                         return flags;
5421
5422                 stripped |= BTRFS_BLOCK_GROUP_DUP;
5423                 stripped = flags & ~stripped;
5424
5425                 /* switch duplicated blocks with raid1 */
5426                 if (flags & BTRFS_BLOCK_GROUP_DUP)
5427                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
5428
5429                 /* turn single device chunks into raid0 */
5430                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
5431         }
5432         return flags;
5433 }
5434
5435 int __alloc_chunk_for_shrink(struct btrfs_root *root,
5436                      struct btrfs_block_group_cache *shrink_block_group,
5437                      int force)
5438 {
5439         struct btrfs_trans_handle *trans;
5440         u64 new_alloc_flags;
5441         u64 calc;
5442
5443         spin_lock(&shrink_block_group->lock);
5444         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
5445                 spin_unlock(&shrink_block_group->lock);
5446
5447                 trans = btrfs_start_transaction(root, 1);
5448                 spin_lock(&shrink_block_group->lock);
5449
5450                 new_alloc_flags = update_block_group_flags(root,
5451                                                    shrink_block_group->flags);
5452                 if (new_alloc_flags != shrink_block_group->flags) {
5453                         calc =
5454                              btrfs_block_group_used(&shrink_block_group->item);
5455                 } else {
5456                         calc = shrink_block_group->key.offset;
5457                 }
5458                 spin_unlock(&shrink_block_group->lock);
5459
5460                 do_chunk_alloc(trans, root->fs_info->extent_root,
5461                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
5462
5463                 btrfs_end_transaction(trans, root);
5464         } else
5465                 spin_unlock(&shrink_block_group->lock);
5466         return 0;
5467 }
5468
5469 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
5470                                  struct btrfs_root *root,
5471                                  u64 objectid, u64 size)
5472 {
5473         struct btrfs_path *path;
5474         struct btrfs_inode_item *item;
5475         struct extent_buffer *leaf;
5476         int ret;
5477
5478         path = btrfs_alloc_path();
5479         if (!path)
5480                 return -ENOMEM;
5481
5482         ret = btrfs_insert_empty_inode(trans, root, path, objectid);
5483         if (ret)
5484                 goto out;
5485
5486         leaf = path->nodes[0];
5487         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
5488         memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
5489         btrfs_set_inode_generation(leaf, item, 1);
5490         btrfs_set_inode_size(leaf, item, size);
5491         btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
5492         btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM |
5493                                           BTRFS_INODE_NOCOMPRESS);
5494         btrfs_mark_buffer_dirty(leaf);
5495         btrfs_release_path(root, path);
5496 out:
5497         btrfs_free_path(path);
5498         return ret;
5499 }
5500
5501 static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
5502                                         struct btrfs_block_group_cache *group)
5503 {
5504         struct inode *inode = NULL;
5505         struct btrfs_trans_handle *trans;
5506         struct btrfs_root *root;
5507         struct btrfs_key root_key;
5508         u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
5509         int err = 0;
5510
5511         root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
5512         root_key.type = BTRFS_ROOT_ITEM_KEY;
5513         root_key.offset = (u64)-1;
5514         root = btrfs_read_fs_root_no_name(fs_info, &root_key);
5515         if (IS_ERR(root))
5516                 return ERR_CAST(root);
5517
5518         trans = btrfs_start_transaction(root, 1);
5519         BUG_ON(!trans);
5520
5521         err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
5522         if (err)
5523                 goto out;
5524
5525         err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
5526         BUG_ON(err);
5527
5528         err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
5529                                        group->key.offset, 0, group->key.offset,
5530                                        0, 0, 0);
5531         BUG_ON(err);
5532
5533         inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
5534         if (inode->i_state & I_NEW) {
5535                 BTRFS_I(inode)->root = root;
5536                 BTRFS_I(inode)->location.objectid = objectid;
5537                 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
5538                 BTRFS_I(inode)->location.offset = 0;
5539                 btrfs_read_locked_inode(inode);
5540                 unlock_new_inode(inode);
5541                 BUG_ON(is_bad_inode(inode));
5542         } else {
5543                 BUG_ON(1);
5544         }
5545
5546         err = btrfs_orphan_add(trans, inode);
5547 out:
5548         btrfs_end_transaction(trans, root);
5549         if (err) {
5550                 if (inode)
5551                         iput(inode);
5552                 inode = ERR_PTR(err);
5553         }
5554         return inode;
5555 }
5556
5557 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
5558 {
5559         struct btrfs_trans_handle *trans;
5560         struct btrfs_path *path;
5561         struct btrfs_fs_info *info = root->fs_info;
5562         struct extent_buffer *leaf;
5563         struct inode *reloc_inode;
5564         struct btrfs_block_group_cache *block_group;
5565         struct btrfs_key key;
5566         u64 skipped;
5567         u64 cur_byte;
5568         u64 total_found;
5569         u32 nritems;
5570         int ret;
5571         int progress;
5572         int pass = 0;
5573
5574         root = root->fs_info->extent_root;
5575
5576         block_group = btrfs_lookup_block_group(info, group_start);
5577         BUG_ON(!block_group);
5578
5579         printk("btrfs relocating block group %llu flags %llu\n",
5580                (unsigned long long)block_group->key.objectid,
5581                (unsigned long long)block_group->flags);
5582
5583         path = btrfs_alloc_path();
5584         BUG_ON(!path);
5585
5586         reloc_inode = create_reloc_inode(info, block_group);
5587         BUG_ON(IS_ERR(reloc_inode));
5588
5589         __alloc_chunk_for_shrink(root, block_group, 1);
5590         set_block_group_readonly(block_group);
5591
5592         btrfs_start_delalloc_inodes(info->tree_root);
5593         btrfs_wait_ordered_extents(info->tree_root, 0);
5594 again:
5595         skipped = 0;
5596         total_found = 0;
5597         progress = 0;
5598         key.objectid = block_group->key.objectid;
5599         key.offset = 0;
5600         key.type = 0;
5601         cur_byte = key.objectid;
5602
5603         trans = btrfs_start_transaction(info->tree_root, 1);
5604         btrfs_commit_transaction(trans, info->tree_root);
5605
5606         mutex_lock(&root->fs_info->cleaner_mutex);
5607         btrfs_clean_old_snapshots(info->tree_root);
5608         btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
5609         mutex_unlock(&root->fs_info->cleaner_mutex);
5610
5611         while(1) {
5612                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5613                 if (ret < 0)
5614                         goto out;
5615 next:
5616                 leaf = path->nodes[0];
5617                 nritems = btrfs_header_nritems(leaf);
5618                 if (path->slots[0] >= nritems) {
5619                         ret = btrfs_next_leaf(root, path);
5620                         if (ret < 0)
5621                                 goto out;
5622                         if (ret == 1) {
5623                                 ret = 0;
5624                                 break;
5625                         }
5626                         leaf = path->nodes[0];
5627                         nritems = btrfs_header_nritems(leaf);
5628                 }
5629
5630                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5631
5632                 if (key.objectid >= block_group->key.objectid +
5633                     block_group->key.offset)
5634                         break;
5635
5636                 if (progress && need_resched()) {
5637                         btrfs_release_path(root, path);
5638                         cond_resched();
5639                         progress = 0;
5640                         continue;
5641                 }
5642                 progress = 1;
5643
5644                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
5645                     key.objectid + key.offset <= cur_byte) {
5646                         path->slots[0]++;
5647                         goto next;
5648                 }
5649
5650                 total_found++;
5651                 cur_byte = key.objectid + key.offset;
5652                 btrfs_release_path(root, path);
5653
5654                 __alloc_chunk_for_shrink(root, block_group, 0);
5655                 ret = relocate_one_extent(root, path, &key, block_group,
5656                                           reloc_inode, pass);
5657                 BUG_ON(ret < 0);
5658                 if (ret > 0)
5659                         skipped++;
5660
5661                 key.objectid = cur_byte;
5662                 key.type = 0;
5663                 key.offset = 0;
5664         }
5665
5666         btrfs_release_path(root, path);
5667
5668         if (pass == 0) {
5669                 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
5670                 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
5671                 WARN_ON(reloc_inode->i_mapping->nrpages);
5672         }
5673
5674         if (total_found > 0) {
5675                 printk("btrfs found %llu extents in pass %d\n",
5676                        (unsigned long long)total_found, pass);
5677                 pass++;
5678                 if (total_found == skipped && pass > 2) {
5679                         iput(reloc_inode);
5680                         reloc_inode = create_reloc_inode(info, block_group);
5681                         pass = 0;
5682                 }
5683                 goto again;
5684         }
5685
5686         /* delete reloc_inode */
5687         iput(reloc_inode);
5688
5689         /* unpin extents in this range */
5690         trans = btrfs_start_transaction(info->tree_root, 1);
5691         btrfs_commit_transaction(trans, info->tree_root);
5692
5693         spin_lock(&block_group->lock);
5694         WARN_ON(block_group->pinned > 0);
5695         WARN_ON(block_group->reserved > 0);
5696         WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
5697         spin_unlock(&block_group->lock);
5698         ret = 0;
5699 out:
5700         btrfs_free_path(path);
5701         return ret;
5702 }
5703
5704 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
5705                            struct btrfs_key *key)
5706 {
5707         int ret = 0;
5708         struct btrfs_key found_key;
5709         struct extent_buffer *leaf;
5710         int slot;
5711
5712         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
5713         if (ret < 0)
5714                 goto out;
5715
5716         while(1) {
5717                 slot = path->slots[0];
5718                 leaf = path->nodes[0];
5719                 if (slot >= btrfs_header_nritems(leaf)) {
5720                         ret = btrfs_next_leaf(root, path);
5721                         if (ret == 0)
5722                                 continue;
5723                         if (ret < 0)
5724                                 goto out;
5725                         break;
5726                 }
5727                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5728
5729                 if (found_key.objectid >= key->objectid &&
5730                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5731                         ret = 0;
5732                         goto out;
5733                 }
5734                 path->slots[0]++;
5735         }
5736         ret = -ENOENT;
5737 out:
5738         return ret;
5739 }
5740
5741 int btrfs_free_block_groups(struct btrfs_fs_info *info)
5742 {
5743         struct btrfs_block_group_cache *block_group;
5744         struct rb_node *n;
5745
5746         spin_lock(&info->block_group_cache_lock);
5747         while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
5748                 block_group = rb_entry(n, struct btrfs_block_group_cache,
5749                                        cache_node);
5750                 rb_erase(&block_group->cache_node,
5751                          &info->block_group_cache_tree);
5752                 spin_unlock(&info->block_group_cache_lock);
5753
5754                 btrfs_remove_free_space_cache(block_group);
5755                 down_write(&block_group->space_info->groups_sem);
5756                 list_del(&block_group->list);
5757                 up_write(&block_group->space_info->groups_sem);
5758                 kfree(block_group);
5759
5760                 spin_lock(&info->block_group_cache_lock);
5761         }
5762         spin_unlock(&info->block_group_cache_lock);
5763         return 0;
5764 }
5765
5766 int btrfs_read_block_groups(struct btrfs_root *root)
5767 {
5768         struct btrfs_path *path;
5769         int ret;
5770         struct btrfs_block_group_cache *cache;
5771         struct btrfs_fs_info *info = root->fs_info;
5772         struct btrfs_space_info *space_info;
5773         struct btrfs_key key;
5774         struct btrfs_key found_key;
5775         struct extent_buffer *leaf;
5776
5777         root = info->extent_root;
5778         key.objectid = 0;
5779         key.offset = 0;
5780         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
5781         path = btrfs_alloc_path();
5782         if (!path)
5783                 return -ENOMEM;
5784
5785         while(1) {
5786                 ret = find_first_block_group(root, path, &key);
5787                 if (ret > 0) {
5788                         ret = 0;
5789                         goto error;
5790                 }
5791                 if (ret != 0)
5792                         goto error;
5793
5794                 leaf = path->nodes[0];
5795                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5796                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
5797                 if (!cache) {
5798                         ret = -ENOMEM;
5799                         break;
5800                 }
5801
5802                 spin_lock_init(&cache->lock);
5803                 mutex_init(&cache->alloc_mutex);
5804                 INIT_LIST_HEAD(&cache->list);
5805                 read_extent_buffer(leaf, &cache->item,
5806                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
5807                                    sizeof(cache->item));
5808                 memcpy(&cache->key, &found_key, sizeof(found_key));
5809
5810                 key.objectid = found_key.objectid + found_key.offset;
5811                 btrfs_release_path(root, path);
5812                 cache->flags = btrfs_block_group_flags(&cache->item);
5813
5814                 ret = update_space_info(info, cache->flags, found_key.offset,
5815                                         btrfs_block_group_used(&cache->item),
5816                                         &space_info);
5817                 BUG_ON(ret);
5818                 cache->space_info = space_info;
5819                 down_write(&space_info->groups_sem);
5820                 list_add_tail(&cache->list, &space_info->block_groups);
5821                 up_write(&space_info->groups_sem);
5822
5823                 ret = btrfs_add_block_group_cache(root->fs_info, cache);
5824                 BUG_ON(ret);
5825
5826                 set_avail_alloc_bits(root->fs_info, cache->flags);
5827                 if (btrfs_chunk_readonly(root, cache->key.objectid))
5828                         set_block_group_readonly(cache);
5829         }
5830         ret = 0;
5831 error:
5832         btrfs_free_path(path);
5833         return ret;
5834 }
5835
5836 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
5837                            struct btrfs_root *root, u64 bytes_used,
5838                            u64 type, u64 chunk_objectid, u64 chunk_offset,
5839                            u64 size)
5840 {
5841         int ret;
5842         struct btrfs_root *extent_root;
5843         struct btrfs_block_group_cache *cache;
5844
5845         extent_root = root->fs_info->extent_root;
5846
5847         root->fs_info->last_trans_new_blockgroup = trans->transid;
5848
5849         cache = kzalloc(sizeof(*cache), GFP_NOFS);
5850         if (!cache)
5851                 return -ENOMEM;
5852
5853         cache->key.objectid = chunk_offset;
5854         cache->key.offset = size;
5855         spin_lock_init(&cache->lock);
5856         mutex_init(&cache->alloc_mutex);
5857         INIT_LIST_HEAD(&cache->list);
5858         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
5859
5860         btrfs_set_block_group_used(&cache->item, bytes_used);
5861         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
5862         cache->flags = type;
5863         btrfs_set_block_group_flags(&cache->item, type);
5864
5865         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
5866                                 &cache->space_info);
5867         BUG_ON(ret);
5868         down_write(&cache->space_info->groups_sem);
5869         list_add_tail(&cache->list, &cache->space_info->block_groups);
5870         up_write(&cache->space_info->groups_sem);
5871
5872         ret = btrfs_add_block_group_cache(root->fs_info, cache);
5873         BUG_ON(ret);
5874
5875         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
5876                                 sizeof(cache->item));
5877         BUG_ON(ret);
5878
5879         finish_current_insert(trans, extent_root, 0);
5880         ret = del_pending_extents(trans, extent_root, 0);
5881         BUG_ON(ret);
5882         set_avail_alloc_bits(extent_root->fs_info, type);
5883
5884         return 0;
5885 }
5886
5887 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5888                              struct btrfs_root *root, u64 group_start)
5889 {
5890         struct btrfs_path *path;
5891         struct btrfs_block_group_cache *block_group;
5892         struct btrfs_key key;
5893         int ret;
5894
5895         root = root->fs_info->extent_root;
5896
5897         block_group = btrfs_lookup_block_group(root->fs_info, group_start);
5898         BUG_ON(!block_group);
5899         BUG_ON(!block_group->ro);
5900
5901         memcpy(&key, &block_group->key, sizeof(key));
5902
5903         path = btrfs_alloc_path();
5904         BUG_ON(!path);
5905
5906         btrfs_remove_free_space_cache(block_group);
5907         rb_erase(&block_group->cache_node,
5908                  &root->fs_info->block_group_cache_tree);
5909         down_write(&block_group->space_info->groups_sem);
5910         list_del(&block_group->list);
5911         up_write(&block_group->space_info->groups_sem);
5912
5913         spin_lock(&block_group->space_info->lock);
5914         block_group->space_info->total_bytes -= block_group->key.offset;
5915         block_group->space_info->bytes_readonly -= block_group->key.offset;
5916         spin_unlock(&block_group->space_info->lock);
5917         block_group->space_info->full = 0;
5918
5919         /*
5920         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
5921         kfree(shrink_block_group);
5922         */
5923
5924         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5925         if (ret > 0)
5926                 ret = -EIO;
5927         if (ret < 0)
5928                 goto out;
5929
5930         ret = btrfs_del_item(trans, root, path);
5931 out:
5932         btrfs_free_path(path);
5933         return ret;
5934 }