Btrfs: use a btree inode instead of sb_getblk
[safe/jmp/linux-2.6] / fs / btrfs / extent-tree.c
1 #include <linux/module.h>
2 #include "ctree.h"
3 #include "disk-io.h"
4 #include "print-tree.h"
5 #include "transaction.h"
6
7 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
8                             *orig_root, u64 num_blocks, u64 search_start, u64
9                             search_end, struct btrfs_key *ins);
10 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
11                                  btrfs_root *extent_root);
12 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
13                                btrfs_root *extent_root);
14
15 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
16                          *root, u64 blocknr, u64 num_blocks)
17 {
18         struct btrfs_path path;
19         int ret;
20         struct btrfs_key key;
21         struct btrfs_leaf *l;
22         struct btrfs_extent_item *item;
23         struct btrfs_key ins;
24         u32 refs;
25
26         find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1,
27                          &ins);
28         btrfs_init_path(&path);
29         key.objectid = blocknr;
30         key.flags = 0;
31         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
32         key.offset = num_blocks;
33         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path,
34                                 0, 1);
35         if (ret != 0)
36                 BUG();
37         BUG_ON(ret != 0);
38         l = btrfs_buffer_leaf(path.nodes[0]);
39         item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
40         refs = btrfs_extent_refs(item);
41         btrfs_set_extent_refs(item, refs + 1);
42         mark_buffer_dirty(path.nodes[0]);
43
44         btrfs_release_path(root->fs_info->extent_root, &path);
45         finish_current_insert(trans, root->fs_info->extent_root);
46         del_pending_extents(trans, root->fs_info->extent_root);
47         return 0;
48 }
49
50 static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
51                             *root, u64 blocknr, u64 num_blocks, u32 *refs)
52 {
53         struct btrfs_path path;
54         int ret;
55         struct btrfs_key key;
56         struct btrfs_leaf *l;
57         struct btrfs_extent_item *item;
58         btrfs_init_path(&path);
59         key.objectid = blocknr;
60         key.offset = num_blocks;
61         key.flags = 0;
62         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
63         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path,
64                                 0, 0);
65         if (ret != 0)
66                 BUG();
67         l = btrfs_buffer_leaf(path.nodes[0]);
68         item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
69         *refs = btrfs_extent_refs(item);
70         btrfs_release_path(root->fs_info->extent_root, &path);
71         return 0;
72 }
73
74 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
75                   struct buffer_head *buf)
76 {
77         u64 blocknr;
78         struct btrfs_node *buf_node;
79         struct btrfs_leaf *buf_leaf;
80         struct btrfs_disk_key *key;
81         struct btrfs_file_extent_item *fi;
82         int i;
83         int leaf;
84         int ret;
85
86         if (!root->ref_cows)
87                 return 0;
88         buf_node = btrfs_buffer_node(buf);
89         leaf = btrfs_is_leaf(buf_node);
90         buf_leaf = btrfs_buffer_leaf(buf);
91         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
92                 if (leaf) {
93                         key = &buf_leaf->items[i].key;
94                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
95                                 continue;
96                         fi = btrfs_item_ptr(buf_leaf, i,
97                                             struct btrfs_file_extent_item);
98                         ret = inc_block_ref(trans, root,
99                                     btrfs_file_extent_disk_blocknr(fi),
100                                     btrfs_file_extent_disk_num_blocks(fi));
101                         BUG_ON(ret);
102                 } else {
103                         blocknr = btrfs_node_blockptr(buf_node, i);
104                         ret = inc_block_ref(trans, root, blocknr, 1);
105                         BUG_ON(ret);
106                 }
107         }
108         return 0;
109 }
110
111 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
112                                btrfs_root *root)
113 {
114         unsigned long gang[8];
115         u64 first = 0;
116         int ret;
117         int i;
118         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
119
120         while(1) {
121                 ret = find_first_radix_bit(pinned_radix, gang,
122                                            ARRAY_SIZE(gang));
123                 if (!ret)
124                         break;
125                 if (!first)
126                         first = gang[0];
127                 for (i = 0; i < ret; i++) {
128                         clear_radix_bit(pinned_radix, gang[i]);
129                 }
130         }
131         if (root->fs_info->last_insert.objectid > first)
132                 root->fs_info->last_insert.objectid = first;
133         root->fs_info->last_insert.offset = 0;
134         return 0;
135 }
136
137 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
138                                  btrfs_root *extent_root)
139 {
140         struct btrfs_key ins;
141         struct btrfs_extent_item extent_item;
142         int i;
143         int ret;
144         u64 super_blocks_used;
145         struct btrfs_fs_info *info = extent_root->fs_info;
146
147         btrfs_set_extent_refs(&extent_item, 1);
148         btrfs_set_extent_owner(&extent_item,
149                 btrfs_header_parentid(btrfs_buffer_header(extent_root->node)));
150         ins.offset = 1;
151         ins.flags = 0;
152         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
153
154         for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) {
155                 ins.objectid = extent_root->fs_info->current_insert.objectid +
156                                 i;
157                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
158                 btrfs_set_super_blocks_used(info->disk_super,
159                                             super_blocks_used + 1);
160                 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
161                                         sizeof(extent_item));
162                 BUG_ON(ret);
163         }
164         extent_root->fs_info->current_insert.offset = 0;
165         return 0;
166 }
167
168 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
169 {
170         int err;
171         struct btrfs_header *header;
172         struct buffer_head *bh;
173
174         if (!pending) {
175                 bh = btrfs_find_tree_block(root, blocknr);
176                 if (bh && buffer_uptodate(bh)) {
177                         header = btrfs_buffer_header(bh);
178                         if (btrfs_header_generation(header) ==
179                             root->fs_info->running_transaction->transid) {
180                                 brelse(bh);
181                                 return 0;
182                         }
183                         brelse(bh);
184                 }
185                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
186         } else {
187                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
188         }
189         BUG_ON(err);
190         return 0;
191 }
192
193 /*
194  * remove an extent from the root, returns 0 on success
195  */
196 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
197                          *root, u64 blocknr, u64 num_blocks, int pin)
198 {
199         struct btrfs_path path;
200         struct btrfs_key key;
201         struct btrfs_fs_info *info = root->fs_info;
202         struct btrfs_root *extent_root = info->extent_root;
203         int ret;
204         struct btrfs_extent_item *ei;
205         struct btrfs_key ins;
206         u32 refs;
207
208         key.objectid = blocknr;
209         key.flags = 0;
210         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
211         key.offset = num_blocks;
212
213         find_free_extent(trans, root, 0, 0, (u64)-1, &ins);
214         btrfs_init_path(&path);
215         ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1);
216         if (ret) {
217                 printk("failed to find %Lu\n", key.objectid);
218                 btrfs_print_tree(extent_root, extent_root->node);
219                 printk("failed to find %Lu\n", key.objectid);
220                 BUG();
221         }
222         ei = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
223                             struct btrfs_extent_item);
224         BUG_ON(ei->refs == 0);
225         refs = btrfs_extent_refs(ei) - 1;
226         btrfs_set_extent_refs(ei, refs);
227         mark_buffer_dirty(path.nodes[0]);
228         if (refs == 0) {
229                 u64 super_blocks_used;
230
231                 if (pin) {
232                         ret = pin_down_block(root, blocknr, 0);
233                         BUG_ON(ret);
234                 }
235
236                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
237                 btrfs_set_super_blocks_used(info->disk_super,
238                                             super_blocks_used - num_blocks);
239                 ret = btrfs_del_item(trans, extent_root, &path);
240                 if (extent_root->fs_info->last_insert.objectid > blocknr)
241                         extent_root->fs_info->last_insert.objectid = blocknr;
242                 if (ret)
243                         BUG();
244         }
245         btrfs_release_path(extent_root, &path);
246         finish_current_insert(trans, extent_root);
247         return ret;
248 }
249
250 /*
251  * find all the blocks marked as pending in the radix tree and remove
252  * them from the extent map
253  */
254 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
255                                btrfs_root *extent_root)
256 {
257         int ret;
258         int wret;
259         int err = 0;
260         unsigned long gang[4];
261         int i;
262         struct radix_tree_root *pending_radix;
263         struct radix_tree_root *pinned_radix;
264
265         pending_radix = &extent_root->fs_info->pending_del_radix;
266         pinned_radix = &extent_root->fs_info->pinned_radix;
267
268         while(1) {
269                 ret = find_first_radix_bit(pending_radix, gang,
270                                            ARRAY_SIZE(gang));
271                 if (!ret)
272                         break;
273                 for (i = 0; i < ret; i++) {
274                         wret = set_radix_bit(pinned_radix, gang[i]);
275                         BUG_ON(wret);
276                         wret = clear_radix_bit(pending_radix, gang[i]);
277                         BUG_ON(wret);
278                         wret = __free_extent(trans, extent_root,
279                                              gang[i], 1, 0);
280                         if (wret)
281                                 err = wret;
282                 }
283         }
284         return err;
285 }
286
287 /*
288  * remove an extent from the root, returns 0 on success
289  */
290 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
291                       *root, u64 blocknr, u64 num_blocks, int pin)
292 {
293         struct btrfs_root *extent_root = root->fs_info->extent_root;
294         int pending_ret;
295         int ret;
296
297         if (root == extent_root) {
298                 pin_down_block(root, blocknr, 1);
299                 return 0;
300         }
301         ret = __free_extent(trans, root, blocknr, num_blocks, pin);
302         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
303         return ret ? ret : pending_ret;
304 }
305
306 /*
307  * walks the btree of allocated extents and find a hole of a given size.
308  * The key ins is changed to record the hole:
309  * ins->objectid == block start
310  * ins->flags = BTRFS_EXTENT_ITEM_KEY
311  * ins->offset == number of blocks
312  * Any available blocks before search_start are skipped.
313  */
314 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
315                             *orig_root, u64 num_blocks, u64 search_start, u64
316                             search_end, struct btrfs_key *ins)
317 {
318         struct btrfs_path path;
319         struct btrfs_key key;
320         int ret;
321         u64 hole_size = 0;
322         int slot = 0;
323         u64 last_block = 0;
324         u64 test_block;
325         int start_found;
326         struct btrfs_leaf *l;
327         struct btrfs_root * root = orig_root->fs_info->extent_root;
328         int total_needed = num_blocks;
329         int level;
330
331         level = btrfs_header_level(btrfs_buffer_header(root->node));
332         total_needed += (level + 1) * 3;
333         if (root->fs_info->last_insert.objectid > search_start)
334                 search_start = root->fs_info->last_insert.objectid;
335
336         ins->flags = 0;
337         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
338
339 check_failed:
340         btrfs_init_path(&path);
341         ins->objectid = search_start;
342         ins->offset = 0;
343         start_found = 0;
344         ret = btrfs_search_slot(trans, root, ins, &path, 0, 0);
345         if (ret < 0)
346                 goto error;
347
348         if (path.slots[0] > 0)
349                 path.slots[0]--;
350
351         while (1) {
352                 l = btrfs_buffer_leaf(path.nodes[0]);
353                 slot = path.slots[0];
354                 if (slot >= btrfs_header_nritems(&l->header)) {
355                         ret = btrfs_next_leaf(root, &path);
356                         if (ret == 0)
357                                 continue;
358                         if (ret < 0)
359                                 goto error;
360                         if (!start_found) {
361                                 ins->objectid = search_start;
362                                 ins->offset = (u64)-1;
363                                 start_found = 1;
364                                 goto check_pending;
365                         }
366                         ins->objectid = last_block > search_start ?
367                                         last_block : search_start;
368                         ins->offset = (u64)-1;
369                         goto check_pending;
370                 }
371                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
372                 if (key.objectid >= search_start) {
373                         if (start_found) {
374                                 if (last_block < search_start)
375                                         last_block = search_start;
376                                 hole_size = key.objectid - last_block;
377                                 if (hole_size > total_needed) {
378                                         ins->objectid = last_block;
379                                         ins->offset = hole_size;
380                                         goto check_pending;
381                                 }
382                         }
383                 }
384                 start_found = 1;
385                 last_block = key.objectid + key.offset;
386                 path.slots[0]++;
387         }
388         // FIXME -ENOSPC
389 check_pending:
390         /* we have to make sure we didn't find an extent that has already
391          * been allocated by the map tree or the original allocation
392          */
393         btrfs_release_path(root, &path);
394         BUG_ON(ins->objectid < search_start);
395         for (test_block = ins->objectid;
396              test_block < ins->objectid + total_needed; test_block++) {
397                 if (test_radix_bit(&root->fs_info->pinned_radix,
398                                       test_block)) {
399                         search_start = test_block + 1;
400                         goto check_failed;
401                 }
402         }
403         BUG_ON(root->fs_info->current_insert.offset);
404         root->fs_info->current_insert.offset = total_needed - num_blocks;
405         root->fs_info->current_insert.objectid = ins->objectid + num_blocks;
406         root->fs_info->current_insert.flags = 0;
407         root->fs_info->last_insert.objectid = ins->objectid;
408         ins->offset = num_blocks;
409         return 0;
410 error:
411         btrfs_release_path(root, &path);
412         return ret;
413 }
414
415 /*
416  * finds a free extent and does all the dirty work required for allocation
417  * returns the key for the extent through ins, and a tree buffer for
418  * the first block of the extent through buf.
419  *
420  * returns 0 if everything worked, non-zero otherwise.
421  */
422 int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root
423                         *root, u64 num_blocks, u64 search_start, u64
424                         search_end, u64 owner, struct btrfs_key *ins)
425 {
426         int ret;
427         int pending_ret;
428         u64 super_blocks_used;
429         struct btrfs_fs_info *info = root->fs_info;
430         struct btrfs_root *extent_root = info->extent_root;
431         struct btrfs_extent_item extent_item;
432
433         btrfs_set_extent_refs(&extent_item, 1);
434         btrfs_set_extent_owner(&extent_item, owner);
435
436         if (root == extent_root) {
437                 BUG_ON(extent_root->fs_info->current_insert.offset == 0);
438                 BUG_ON(num_blocks != 1);
439                 BUG_ON(extent_root->fs_info->current_insert.flags ==
440                        extent_root->fs_info->current_insert.offset);
441                 ins->offset = 1;
442                 ins->objectid = extent_root->fs_info->current_insert.objectid +
443                                 extent_root->fs_info->current_insert.flags++;
444                 return 0;
445         }
446         ret = find_free_extent(trans, root, num_blocks, search_start,
447                                search_end, ins);
448         if (ret)
449                 return ret;
450
451         super_blocks_used = btrfs_super_blocks_used(info->disk_super);
452         btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
453                                     num_blocks);
454         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
455                                 sizeof(extent_item));
456
457         finish_current_insert(trans, extent_root);
458         pending_ret = del_pending_extents(trans, extent_root);
459         if (ret)
460                 return ret;
461         if (pending_ret)
462                 return pending_ret;
463         return 0;
464 }
465
466 /*
467  * helper function to allocate a block for a given tree
468  * returns the tree buffer or NULL.
469  */
470 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
471                                             struct btrfs_root *root)
472 {
473         struct btrfs_key ins;
474         int ret;
475         struct buffer_head *buf;
476
477         ret = btrfs_alloc_extent(trans, root, 1, 0, (unsigned long)-1,
478                 btrfs_header_parentid(btrfs_buffer_header(root->node)), &ins);
479         if (ret) {
480                 BUG();
481                 return NULL;
482         }
483         buf = btrfs_find_create_tree_block(root, ins.objectid);
484         set_buffer_uptodate(buf);
485         return buf;
486 }
487
488 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
489                          struct btrfs_root *root, struct buffer_head *cur)
490 {
491         struct btrfs_disk_key *key;
492         struct btrfs_leaf *leaf;
493         struct btrfs_file_extent_item *fi;
494         int i;
495         int nritems;
496         int ret;
497
498         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
499         leaf = btrfs_buffer_leaf(cur);
500         nritems = btrfs_header_nritems(&leaf->header);
501         for (i = 0; i < nritems; i++) {
502                 key = &leaf->items[i].key;
503                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
504                         continue;
505                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
506                 /*
507                  * FIXME make sure to insert a trans record that
508                  * repeats the snapshot del on crash
509                  */
510                 ret = btrfs_free_extent(trans, root,
511                                         btrfs_file_extent_disk_blocknr(fi),
512                                         btrfs_file_extent_disk_num_blocks(fi),
513                                         0);
514                 BUG_ON(ret);
515         }
516         return 0;
517 }
518
519 /*
520  * helper function for drop_snapshot, this walks down the tree dropping ref
521  * counts as it goes.
522  */
523 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
524                           *root, struct btrfs_path *path, int *level)
525 {
526         struct buffer_head *next;
527         struct buffer_head *cur;
528         u64 blocknr;
529         int ret;
530         u32 refs;
531
532         ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr,
533                                1, &refs);
534         BUG_ON(ret);
535         if (refs > 1)
536                 goto out;
537         /*
538          * walk down to the last node level and free all the leaves
539          */
540         while(*level >= 0) {
541                 cur = path->nodes[*level];
542                 if (path->slots[*level] >=
543                     btrfs_header_nritems(btrfs_buffer_header(cur)))
544                         break;
545                 if (*level == 0) {
546                         ret = drop_leaf_ref(trans, root, cur);
547                         BUG_ON(ret);
548                         break;
549                 }
550                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
551                                               path->slots[*level]);
552                 ret = lookup_block_ref(trans, root, blocknr, 1, &refs);
553                 BUG_ON(ret);
554                 if (refs != 1) {
555                         path->slots[*level]++;
556                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
557                         BUG_ON(ret);
558                         continue;
559                 }
560                 next = read_tree_block(root, blocknr);
561                 if (path->nodes[*level-1])
562                         btrfs_block_release(root, path->nodes[*level-1]);
563                 path->nodes[*level-1] = next;
564                 *level = btrfs_header_level(btrfs_buffer_header(next));
565                 path->slots[*level] = 0;
566         }
567 out:
568         ret = btrfs_free_extent(trans, root,
569                                 path->nodes[*level]->b_blocknr, 1, 1);
570         btrfs_block_release(root, path->nodes[*level]);
571         path->nodes[*level] = NULL;
572         *level += 1;
573         BUG_ON(ret);
574         return 0;
575 }
576
577 /*
578  * helper for dropping snapshots.  This walks back up the tree in the path
579  * to find the first node higher up where we haven't yet gone through
580  * all the slots
581  */
582 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
583                         *root, struct btrfs_path *path, int *level)
584 {
585         int i;
586         int slot;
587         int ret;
588         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
589                 slot = path->slots[i];
590                 if (slot < btrfs_header_nritems(
591                     btrfs_buffer_header(path->nodes[i])) - 1) {
592                         path->slots[i]++;
593                         *level = i;
594                         return 0;
595                 } else {
596                         ret = btrfs_free_extent(trans, root,
597                                                 path->nodes[*level]->b_blocknr,
598                                                 1, 1);
599                         BUG_ON(ret);
600                         btrfs_block_release(root, path->nodes[*level]);
601                         path->nodes[*level] = NULL;
602                         *level = i + 1;
603                 }
604         }
605         return 1;
606 }
607
608 /*
609  * drop the reference count on the tree rooted at 'snap'.  This traverses
610  * the tree freeing any blocks that have a ref count of zero after being
611  * decremented.
612  */
613 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
614                         *root, struct buffer_head *snap)
615 {
616         int ret = 0;
617         int wret;
618         int level;
619         struct btrfs_path path;
620         int i;
621         int orig_level;
622
623         btrfs_init_path(&path);
624
625         level = btrfs_header_level(btrfs_buffer_header(snap));
626         orig_level = level;
627         path.nodes[level] = snap;
628         path.slots[level] = 0;
629         while(1) {
630                 wret = walk_down_tree(trans, root, &path, &level);
631                 if (wret > 0)
632                         break;
633                 if (wret < 0)
634                         ret = wret;
635
636                 wret = walk_up_tree(trans, root, &path, &level);
637                 if (wret > 0)
638                         break;
639                 if (wret < 0)
640                         ret = wret;
641         }
642         for (i = 0; i <= orig_level; i++) {
643                 if (path.nodes[i]) {
644                         btrfs_block_release(root, path.nodes[i]);
645                 }
646         }
647         return ret;
648 }