a0dfa2d6cb9bc956ebbc324c72d98fc8d0fbe034
[safe/jmp/linux-2.6] / fs / btrfs / ctree.c
1 #include <linux/module.h>
2 #include "ctree.h"
3 #include "disk-io.h"
4 #include "transaction.h"
5
6 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
7                       *root, struct btrfs_path *path, int level);
8 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
9                       *root, struct btrfs_path *path, int data_size);
10 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
11                           *root, struct buffer_head *dst, struct buffer_head
12                           *src);
13 static int balance_node_right(struct btrfs_trans_handle *trans, struct
14                               btrfs_root *root, struct buffer_head *dst_buf,
15                               struct buffer_head *src_buf);
16 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
17                    struct btrfs_path *path, int level, int slot);
18
19 inline void btrfs_init_path(struct btrfs_path *p)
20 {
21         memset(p, 0, sizeof(*p));
22 }
23
24 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
25 {
26         int i;
27         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
28                 if (!p->nodes[i])
29                         break;
30                 btrfs_block_release(root, p->nodes[i]);
31         }
32         memset(p, 0, sizeof(*p));
33 }
34
35 static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
36                            *root, struct buffer_head *buf, struct buffer_head
37                            *parent, int parent_slot, struct buffer_head
38                            **cow_ret)
39 {
40         struct buffer_head *cow;
41         struct btrfs_node *cow_node;
42
43         if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
44                                     trans->transid) {
45                 *cow_ret = buf;
46                 return 0;
47         }
48         cow = btrfs_alloc_free_block(trans, root);
49         cow_node = btrfs_buffer_node(cow);
50         memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
51         btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr);
52         btrfs_set_header_generation(&cow_node->header, trans->transid);
53         *cow_ret = cow;
54         btrfs_mark_buffer_dirty(cow);
55         btrfs_inc_ref(trans, root, buf);
56         if (buf == root->node) {
57                 root->node = cow;
58                 get_bh(cow);
59                 if (buf != root->commit_root)
60                         btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
61                 btrfs_block_release(root, buf);
62         } else {
63                 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
64                                         cow->b_blocknr);
65                 btrfs_mark_buffer_dirty(parent);
66                 btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
67         }
68         btrfs_block_release(root, buf);
69         return 0;
70 }
71
72 /*
73  * The leaf data grows from end-to-front in the node.
74  * this returns the address of the start of the last item,
75  * which is the stop of the leaf data stack
76  */
77 static inline unsigned int leaf_data_end(struct btrfs_root *root,
78                                          struct btrfs_leaf *leaf)
79 {
80         u32 nr = btrfs_header_nritems(&leaf->header);
81         if (nr == 0)
82                 return BTRFS_LEAF_DATA_SIZE(root);
83         return btrfs_item_offset(leaf->items + nr - 1);
84 }
85
86 /*
87  * The space between the end of the leaf items and
88  * the start of the leaf data.  IOW, how much room
89  * the leaf has left for both items and data
90  */
91 int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
92 {
93         int data_end = leaf_data_end(root, leaf);
94         int nritems = btrfs_header_nritems(&leaf->header);
95         char *items_end = (char *)(leaf->items + nritems + 1);
96         return (char *)(btrfs_leaf_data(leaf) + data_end) - (char *)items_end;
97 }
98
99 /*
100  * compare two keys in a memcmp fashion
101  */
102 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
103 {
104         struct btrfs_key k1;
105
106         btrfs_disk_key_to_cpu(&k1, disk);
107
108         if (k1.objectid > k2->objectid)
109                 return 1;
110         if (k1.objectid < k2->objectid)
111                 return -1;
112         if (k1.offset > k2->offset)
113                 return 1;
114         if (k1.offset < k2->offset)
115                 return -1;
116         if (k1.flags > k2->flags)
117                 return 1;
118         if (k1.flags < k2->flags)
119                 return -1;
120         return 0;
121 }
122
123 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
124                       int level)
125 {
126         int i;
127         struct btrfs_node *parent = NULL;
128         struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
129         int parent_slot;
130         u32 nritems = btrfs_header_nritems(&node->header);
131
132         if (path->nodes[level + 1])
133                 parent = btrfs_buffer_node(path->nodes[level + 1]);
134         parent_slot = path->slots[level + 1];
135         BUG_ON(nritems == 0);
136         if (parent) {
137                 struct btrfs_disk_key *parent_key;
138                 parent_key = &parent->ptrs[parent_slot].key;
139                 BUG_ON(memcmp(parent_key, &node->ptrs[0].key,
140                               sizeof(struct btrfs_disk_key)));
141                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
142                        btrfs_header_blocknr(&node->header));
143         }
144         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
145         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
146                 struct btrfs_key cpukey;
147                 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
148                 BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
149         }
150         return 0;
151 }
152
153 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
154                       int level)
155 {
156         int i;
157         struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]);
158         struct btrfs_node *parent = NULL;
159         int parent_slot;
160         u32 nritems = btrfs_header_nritems(&leaf->header);
161
162         if (path->nodes[level + 1])
163                 parent = btrfs_buffer_node(path->nodes[level + 1]);
164         parent_slot = path->slots[level + 1];
165         BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
166
167         if (nritems == 0)
168                 return 0;
169
170         if (parent) {
171                 struct btrfs_disk_key *parent_key;
172                 parent_key = &parent->ptrs[parent_slot].key;
173                 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
174                        sizeof(struct btrfs_disk_key)));
175                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
176                        btrfs_header_blocknr(&leaf->header));
177         }
178         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
179                 struct btrfs_key cpukey;
180                 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
181                 BUG_ON(comp_keys(&leaf->items[i].key,
182                                  &cpukey) >= 0);
183                 BUG_ON(btrfs_item_offset(leaf->items + i) !=
184                         btrfs_item_end(leaf->items + i + 1));
185                 if (i == 0) {
186                         BUG_ON(btrfs_item_offset(leaf->items + i) +
187                                btrfs_item_size(leaf->items + i) !=
188                                BTRFS_LEAF_DATA_SIZE(root));
189                 }
190         }
191         return 0;
192 }
193
194 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
195                         int level)
196 {
197         if (level == 0)
198                 return check_leaf(root, path, level);
199         return check_node(root, path, level);
200 }
201
202 /*
203  * search for key in the array p.  items p are item_size apart
204  * and there are 'max' items in p
205  * the slot in the array is returned via slot, and it points to
206  * the place where you would insert key if it is not found in
207  * the array.
208  *
209  * slot may point to max if the key is bigger than all of the keys
210  */
211 static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
212                        int max, int *slot)
213 {
214         int low = 0;
215         int high = max;
216         int mid;
217         int ret;
218         struct btrfs_disk_key *tmp;
219
220         while(low < high) {
221                 mid = (low + high) / 2;
222                 tmp = (struct btrfs_disk_key *)(p + mid * item_size);
223                 ret = comp_keys(tmp, key);
224
225                 if (ret < 0)
226                         low = mid + 1;
227                 else if (ret > 0)
228                         high = mid;
229                 else {
230                         *slot = mid;
231                         return 0;
232                 }
233         }
234         *slot = low;
235         return 1;
236 }
237
238 /*
239  * simple bin_search frontend that does the right thing for
240  * leaves vs nodes
241  */
242 static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot)
243 {
244         if (btrfs_is_leaf(c)) {
245                 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
246                 return generic_bin_search((void *)l->items,
247                                           sizeof(struct btrfs_item),
248                                           key, btrfs_header_nritems(&c->header),
249                                           slot);
250         } else {
251                 return generic_bin_search((void *)c->ptrs,
252                                           sizeof(struct btrfs_key_ptr),
253                                           key, btrfs_header_nritems(&c->header),
254                                           slot);
255         }
256         return -1;
257 }
258
259 static struct buffer_head *read_node_slot(struct btrfs_root *root,
260                                    struct buffer_head *parent_buf,
261                                    int slot)
262 {
263         struct btrfs_node *node = btrfs_buffer_node(parent_buf);
264         if (slot < 0)
265                 return NULL;
266         if (slot >= btrfs_header_nritems(&node->header))
267                 return NULL;
268         return read_tree_block(root, btrfs_node_blockptr(node, slot));
269 }
270
271 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
272                          *root, struct btrfs_path *path, int level)
273 {
274         struct buffer_head *right_buf;
275         struct buffer_head *mid_buf;
276         struct buffer_head *left_buf;
277         struct buffer_head *parent_buf = NULL;
278         struct btrfs_node *right = NULL;
279         struct btrfs_node *mid;
280         struct btrfs_node *left = NULL;
281         struct btrfs_node *parent = NULL;
282         int ret = 0;
283         int wret;
284         int pslot;
285         int orig_slot = path->slots[level];
286         u64 orig_ptr;
287
288         if (level == 0)
289                 return 0;
290
291         mid_buf = path->nodes[level];
292         mid = btrfs_buffer_node(mid_buf);
293         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
294
295         if (level < BTRFS_MAX_LEVEL - 1)
296                 parent_buf = path->nodes[level + 1];
297         pslot = path->slots[level + 1];
298
299         /*
300          * deal with the case where there is only one pointer in the root
301          * by promoting the node below to a root
302          */
303         if (!parent_buf) {
304                 struct buffer_head *child;
305                 u64 blocknr = mid_buf->b_blocknr;
306
307                 if (btrfs_header_nritems(&mid->header) != 1)
308                         return 0;
309
310                 /* promote the child to a root */
311                 child = read_node_slot(root, mid_buf, 0);
312                 BUG_ON(!child);
313                 root->node = child;
314                 path->nodes[level] = NULL;
315                 clean_tree_block(trans, root, mid_buf);
316                 wait_on_buffer(mid_buf);
317                 /* once for the path */
318                 btrfs_block_release(root, mid_buf);
319                 /* once for the root ptr */
320                 btrfs_block_release(root, mid_buf);
321                 return btrfs_free_extent(trans, root, blocknr, 1, 1);
322         }
323         parent = btrfs_buffer_node(parent_buf);
324
325         if (btrfs_header_nritems(&mid->header) >
326             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
327                 return 0;
328
329         left_buf = read_node_slot(root, parent_buf, pslot - 1);
330         right_buf = read_node_slot(root, parent_buf, pslot + 1);
331
332         /* first, try to make some room in the middle buffer */
333         if (left_buf) {
334                 btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
335                                 &left_buf);
336                 left = btrfs_buffer_node(left_buf);
337                 orig_slot += btrfs_header_nritems(&left->header);
338                 wret = push_node_left(trans, root, left_buf, mid_buf);
339                 if (wret < 0)
340                         ret = wret;
341         }
342
343         /*
344          * then try to empty the right most buffer into the middle
345          */
346         if (right_buf) {
347                 btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
348                                 &right_buf);
349                 right = btrfs_buffer_node(right_buf);
350                 wret = push_node_left(trans, root, mid_buf, right_buf);
351                 if (wret < 0)
352                         ret = wret;
353                 if (btrfs_header_nritems(&right->header) == 0) {
354                         u64 blocknr = right_buf->b_blocknr;
355                         clean_tree_block(trans, root, right_buf);
356                         wait_on_buffer(right_buf);
357                         btrfs_block_release(root, right_buf);
358                         right_buf = NULL;
359                         right = NULL;
360                         wret = del_ptr(trans, root, path, level + 1, pslot +
361                                        1);
362                         if (wret)
363                                 ret = wret;
364                         wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
365                         if (wret)
366                                 ret = wret;
367                 } else {
368                         btrfs_memcpy(root, parent,
369                                      &parent->ptrs[pslot + 1].key,
370                                      &right->ptrs[0].key,
371                                      sizeof(struct btrfs_disk_key));
372                         btrfs_mark_buffer_dirty(parent_buf);
373                 }
374         }
375         if (btrfs_header_nritems(&mid->header) == 1) {
376                 /*
377                  * we're not allowed to leave a node with one item in the
378                  * tree during a delete.  A deletion from lower in the tree
379                  * could try to delete the only pointer in this node.
380                  * So, pull some keys from the left.
381                  * There has to be a left pointer at this point because
382                  * otherwise we would have pulled some pointers from the
383                  * right
384                  */
385                 BUG_ON(!left_buf);
386                 wret = balance_node_right(trans, root, mid_buf, left_buf);
387                 if (wret < 0)
388                         ret = wret;
389                 BUG_ON(wret == 1);
390         }
391         if (btrfs_header_nritems(&mid->header) == 0) {
392                 /* we've managed to empty the middle node, drop it */
393                 u64 blocknr = mid_buf->b_blocknr;
394                 clean_tree_block(trans, root, mid_buf);
395                 wait_on_buffer(mid_buf);
396                 btrfs_block_release(root, mid_buf);
397                 mid_buf = NULL;
398                 mid = NULL;
399                 wret = del_ptr(trans, root, path, level + 1, pslot);
400                 if (wret)
401                         ret = wret;
402                 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
403                 if (wret)
404                         ret = wret;
405         } else {
406                 /* update the parent key to reflect our changes */
407                 btrfs_memcpy(root, parent,
408                              &parent->ptrs[pslot].key, &mid->ptrs[0].key,
409                              sizeof(struct btrfs_disk_key));
410                 btrfs_mark_buffer_dirty(parent_buf);
411         }
412
413         /* update the path */
414         if (left_buf) {
415                 if (btrfs_header_nritems(&left->header) > orig_slot) {
416                         get_bh(left_buf);
417                         path->nodes[level] = left_buf;
418                         path->slots[level + 1] -= 1;
419                         path->slots[level] = orig_slot;
420                         if (mid_buf)
421                                 btrfs_block_release(root, mid_buf);
422                 } else {
423                         orig_slot -= btrfs_header_nritems(&left->header);
424                         path->slots[level] = orig_slot;
425                 }
426         }
427         /* double check we haven't messed things up */
428         check_block(root, path, level);
429         if (orig_ptr !=
430             btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]),
431                                 path->slots[level]))
432                 BUG();
433
434         if (right_buf)
435                 btrfs_block_release(root, right_buf);
436         if (left_buf)
437                 btrfs_block_release(root, left_buf);
438         return ret;
439 }
440
441 /*
442  * look for key in the tree.  path is filled in with nodes along the way
443  * if key is found, we return zero and you can find the item in the leaf
444  * level of the path (level 0)
445  *
446  * If the key isn't found, the path points to the slot where it should
447  * be inserted, and 1 is returned.  If there are other errors during the
448  * search a negative error number is returned.
449  *
450  * if ins_len > 0, nodes and leaves will be split as we walk down the
451  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
452  * possible)
453  */
454 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
455                       *root, struct btrfs_key *key, struct btrfs_path *p, int
456                       ins_len, int cow)
457 {
458         struct buffer_head *b;
459         struct buffer_head *cow_buf;
460         struct btrfs_node *c;
461         int slot;
462         int ret;
463         int level;
464
465         WARN_ON(p->nodes[0] != NULL);
466         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
467 again:
468         b = root->node;
469         get_bh(b);
470         while (b) {
471                 c = btrfs_buffer_node(b);
472                 level = btrfs_header_level(&c->header);
473                 if (cow) {
474                         int wret;
475                         wret = btrfs_cow_block(trans, root, b,
476                                                p->nodes[level + 1],
477                                                p->slots[level + 1],
478                                                &cow_buf);
479                         b = cow_buf;
480                 }
481                 BUG_ON(!cow && ins_len);
482                 c = btrfs_buffer_node(b);
483                 p->nodes[level] = b;
484                 ret = check_block(root, p, level);
485                 if (ret)
486                         return -1;
487                 ret = bin_search(c, key, &slot);
488                 if (!btrfs_is_leaf(c)) {
489                         if (ret && slot > 0)
490                                 slot -= 1;
491                         p->slots[level] = slot;
492                         if (ins_len > 0 && btrfs_header_nritems(&c->header) ==
493                             BTRFS_NODEPTRS_PER_BLOCK(root)) {
494                                 int sret = split_node(trans, root, p, level);
495                                 BUG_ON(sret > 0);
496                                 if (sret)
497                                         return sret;
498                                 b = p->nodes[level];
499                                 c = btrfs_buffer_node(b);
500                                 slot = p->slots[level];
501                         } else if (ins_len < 0) {
502                                 int sret = balance_level(trans, root, p,
503                                                          level);
504                                 if (sret)
505                                         return sret;
506                                 b = p->nodes[level];
507                                 if (!b)
508                                         goto again;
509                                 c = btrfs_buffer_node(b);
510                                 slot = p->slots[level];
511                                 BUG_ON(btrfs_header_nritems(&c->header) == 1);
512                         }
513                         b = read_tree_block(root, btrfs_node_blockptr(c, slot));
514                 } else {
515                         struct btrfs_leaf *l = (struct btrfs_leaf *)c;
516                         p->slots[level] = slot;
517                         if (ins_len > 0 && btrfs_leaf_free_space(root, l) <
518                             sizeof(struct btrfs_item) + ins_len) {
519                                 int sret = split_leaf(trans, root, p, ins_len);
520                                 BUG_ON(sret > 0);
521                                 if (sret)
522                                         return sret;
523                         }
524                         return ret;
525                 }
526         }
527         return 1;
528 }
529
530 /*
531  * adjust the pointers going up the tree, starting at level
532  * making sure the right key of each node is points to 'key'.
533  * This is used after shifting pointers to the left, so it stops
534  * fixing up pointers when a given leaf/node is not in slot 0 of the
535  * higher levels
536  *
537  * If this fails to write a tree block, it returns -1, but continues
538  * fixing up the blocks in ram so the tree is consistent.
539  */
540 static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root
541                           *root, struct btrfs_path *path, struct btrfs_disk_key
542                           *key, int level)
543 {
544         int i;
545         int ret = 0;
546         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
547                 struct btrfs_node *t;
548                 int tslot = path->slots[i];
549                 if (!path->nodes[i])
550                         break;
551                 t = btrfs_buffer_node(path->nodes[i]);
552                 btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key));
553                 btrfs_mark_buffer_dirty(path->nodes[i]);
554                 if (tslot != 0)
555                         break;
556         }
557         return ret;
558 }
559
560 /*
561  * try to push data from one node into the next node left in the
562  * tree.
563  *
564  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
565  * error, and > 0 if there was no room in the left hand block.
566  */
567 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
568                           *root, struct buffer_head *dst_buf, struct
569                           buffer_head *src_buf)
570 {
571         struct btrfs_node *src = btrfs_buffer_node(src_buf);
572         struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
573         int push_items = 0;
574         int src_nritems;
575         int dst_nritems;
576         int ret = 0;
577
578         src_nritems = btrfs_header_nritems(&src->header);
579         dst_nritems = btrfs_header_nritems(&dst->header);
580         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
581         if (push_items <= 0) {
582                 return 1;
583         }
584
585         if (src_nritems < push_items)
586                 push_items = src_nritems;
587
588         btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs,
589                      push_items * sizeof(struct btrfs_key_ptr));
590         if (push_items < src_nritems) {
591                 btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items,
592                         (src_nritems - push_items) *
593                         sizeof(struct btrfs_key_ptr));
594         }
595         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
596         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
597         btrfs_mark_buffer_dirty(src_buf);
598         btrfs_mark_buffer_dirty(dst_buf);
599         return ret;
600 }
601
602 /*
603  * try to push data from one node into the next node right in the
604  * tree.
605  *
606  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
607  * error, and > 0 if there was no room in the right hand block.
608  *
609  * this will  only push up to 1/2 the contents of the left node over
610  */
611 static int balance_node_right(struct btrfs_trans_handle *trans, struct
612                               btrfs_root *root, struct buffer_head *dst_buf,
613                               struct buffer_head *src_buf)
614 {
615         struct btrfs_node *src = btrfs_buffer_node(src_buf);
616         struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
617         int push_items = 0;
618         int max_push;
619         int src_nritems;
620         int dst_nritems;
621         int ret = 0;
622
623         src_nritems = btrfs_header_nritems(&src->header);
624         dst_nritems = btrfs_header_nritems(&dst->header);
625         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
626         if (push_items <= 0) {
627                 return 1;
628         }
629
630         max_push = src_nritems / 2 + 1;
631         /* don't try to empty the node */
632         if (max_push > src_nritems)
633                 return 1;
634         if (max_push < push_items)
635                 push_items = max_push;
636
637         btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs,
638                       dst_nritems * sizeof(struct btrfs_key_ptr));
639
640         btrfs_memcpy(root, dst, dst->ptrs,
641                      src->ptrs + src_nritems - push_items,
642                      push_items * sizeof(struct btrfs_key_ptr));
643
644         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
645         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
646
647         btrfs_mark_buffer_dirty(src_buf);
648         btrfs_mark_buffer_dirty(dst_buf);
649         return ret;
650 }
651
652 /*
653  * helper function to insert a new root level in the tree.
654  * A new node is allocated, and a single item is inserted to
655  * point to the existing root
656  *
657  * returns zero on success or < 0 on failure.
658  */
659 static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
660                            *root, struct btrfs_path *path, int level)
661 {
662         struct buffer_head *t;
663         struct btrfs_node *lower;
664         struct btrfs_node *c;
665         struct btrfs_disk_key *lower_key;
666
667         BUG_ON(path->nodes[level]);
668         BUG_ON(path->nodes[level-1] != root->node);
669
670         t = btrfs_alloc_free_block(trans, root);
671         c = btrfs_buffer_node(t);
672         memset(c, 0, root->blocksize);
673         btrfs_set_header_nritems(&c->header, 1);
674         btrfs_set_header_level(&c->header, level);
675         btrfs_set_header_blocknr(&c->header, t->b_blocknr);
676         btrfs_set_header_generation(&c->header, trans->transid);
677         btrfs_set_header_parentid(&c->header,
678               btrfs_header_parentid(btrfs_buffer_header(root->node)));
679         lower = btrfs_buffer_node(path->nodes[level-1]);
680         if (btrfs_is_leaf(lower))
681                 lower_key = &((struct btrfs_leaf *)lower)->items[0].key;
682         else
683                 lower_key = &lower->ptrs[0].key;
684         btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key,
685                      sizeof(struct btrfs_disk_key));
686         btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr);
687
688         btrfs_mark_buffer_dirty(t);
689
690         /* the super has an extra ref to root->node */
691         btrfs_block_release(root, root->node);
692         root->node = t;
693         get_bh(t);
694         path->nodes[level] = t;
695         path->slots[level] = 0;
696         return 0;
697 }
698
699 /*
700  * worker function to insert a single pointer in a node.
701  * the node should have enough room for the pointer already
702  *
703  * slot and level indicate where you want the key to go, and
704  * blocknr is the block the key points to.
705  *
706  * returns zero on success and < 0 on any error
707  */
708 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
709                       *root, struct btrfs_path *path, struct btrfs_disk_key
710                       *key, u64 blocknr, int slot, int level)
711 {
712         struct btrfs_node *lower;
713         int nritems;
714
715         BUG_ON(!path->nodes[level]);
716         lower = btrfs_buffer_node(path->nodes[level]);
717         nritems = btrfs_header_nritems(&lower->header);
718         if (slot > nritems)
719                 BUG();
720         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
721                 BUG();
722         if (slot != nritems) {
723                 btrfs_memmove(root, lower, lower->ptrs + slot + 1,
724                               lower->ptrs + slot,
725                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
726         }
727         btrfs_memcpy(root, lower, &lower->ptrs[slot].key,
728                      key, sizeof(struct btrfs_disk_key));
729         btrfs_set_node_blockptr(lower, slot, blocknr);
730         btrfs_set_header_nritems(&lower->header, nritems + 1);
731         btrfs_mark_buffer_dirty(path->nodes[level]);
732         return 0;
733 }
734
735 /*
736  * split the node at the specified level in path in two.
737  * The path is corrected to point to the appropriate node after the split
738  *
739  * Before splitting this tries to make some room in the node by pushing
740  * left and right, if either one works, it returns right away.
741  *
742  * returns 0 on success and < 0 on failure
743  */
744 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
745                       *root, struct btrfs_path *path, int level)
746 {
747         struct buffer_head *t;
748         struct btrfs_node *c;
749         struct buffer_head *split_buffer;
750         struct btrfs_node *split;
751         int mid;
752         int ret;
753         int wret;
754         u32 c_nritems;
755
756         t = path->nodes[level];
757         c = btrfs_buffer_node(t);
758         if (t == root->node) {
759                 /* trying to split the root, lets make a new one */
760                 ret = insert_new_root(trans, root, path, level + 1);
761                 if (ret)
762                         return ret;
763         }
764         c_nritems = btrfs_header_nritems(&c->header);
765         split_buffer = btrfs_alloc_free_block(trans, root);
766         split = btrfs_buffer_node(split_buffer);
767         btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
768         btrfs_set_header_level(&split->header, btrfs_header_level(&c->header));
769         btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr);
770         btrfs_set_header_generation(&split->header, trans->transid);
771         btrfs_set_header_parentid(&split->header,
772               btrfs_header_parentid(btrfs_buffer_header(root->node)));
773         mid = (c_nritems + 1) / 2;
774         btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid,
775                      (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
776         btrfs_set_header_nritems(&split->header, c_nritems - mid);
777         btrfs_set_header_nritems(&c->header, mid);
778         ret = 0;
779
780         btrfs_mark_buffer_dirty(t);
781         btrfs_mark_buffer_dirty(split_buffer);
782         wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
783                           split_buffer->b_blocknr, path->slots[level + 1] + 1,
784                           level + 1);
785         if (wret)
786                 ret = wret;
787
788         if (path->slots[level] >= mid) {
789                 path->slots[level] -= mid;
790                 btrfs_block_release(root, t);
791                 path->nodes[level] = split_buffer;
792                 path->slots[level + 1] += 1;
793         } else {
794                 btrfs_block_release(root, split_buffer);
795         }
796         return ret;
797 }
798
799 /*
800  * how many bytes are required to store the items in a leaf.  start
801  * and nr indicate which items in the leaf to check.  This totals up the
802  * space used both by the item structs and the item data
803  */
804 static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
805 {
806         int data_len;
807         int end = start + nr - 1;
808
809         if (!nr)
810                 return 0;
811         data_len = btrfs_item_end(l->items + start);
812         data_len = data_len - btrfs_item_offset(l->items + end);
813         data_len += sizeof(struct btrfs_item) * nr;
814         return data_len;
815 }
816
817 /*
818  * push some data in the path leaf to the right, trying to free up at
819  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
820  *
821  * returns 1 if the push failed because the other node didn't have enough
822  * room, 0 if everything worked out and < 0 if there were major errors.
823  */
824 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
825                            *root, struct btrfs_path *path, int data_size)
826 {
827         struct buffer_head *left_buf = path->nodes[0];
828         struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf);
829         struct btrfs_leaf *right;
830         struct buffer_head *right_buf;
831         struct buffer_head *upper;
832         struct btrfs_node *upper_node;
833         int slot;
834         int i;
835         int free_space;
836         int push_space = 0;
837         int push_items = 0;
838         struct btrfs_item *item;
839         u32 left_nritems;
840         u32 right_nritems;
841
842         slot = path->slots[1];
843         if (!path->nodes[1]) {
844                 return 1;
845         }
846         upper = path->nodes[1];
847         upper_node = btrfs_buffer_node(upper);
848         if (slot >= btrfs_header_nritems(&upper_node->header) - 1) {
849                 return 1;
850         }
851         right_buf = read_tree_block(root,
852                     btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1));
853         right = btrfs_buffer_leaf(right_buf);
854         free_space = btrfs_leaf_free_space(root, right);
855         if (free_space < data_size + sizeof(struct btrfs_item)) {
856                 btrfs_block_release(root, right_buf);
857                 return 1;
858         }
859         /* cow and double check */
860         btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf);
861         right = btrfs_buffer_leaf(right_buf);
862         free_space = btrfs_leaf_free_space(root, right);
863         if (free_space < data_size + sizeof(struct btrfs_item)) {
864                 btrfs_block_release(root, right_buf);
865                 return 1;
866         }
867
868         left_nritems = btrfs_header_nritems(&left->header);
869         for (i = left_nritems - 1; i >= 0; i--) {
870                 item = left->items + i;
871                 if (path->slots[0] == i)
872                         push_space += data_size + sizeof(*item);
873                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
874                     free_space)
875                         break;
876                 push_items++;
877                 push_space += btrfs_item_size(item) + sizeof(*item);
878         }
879         if (push_items == 0) {
880                 btrfs_block_release(root, right_buf);
881                 return 1;
882         }
883         right_nritems = btrfs_header_nritems(&right->header);
884         /* push left to right */
885         push_space = btrfs_item_end(left->items + left_nritems - push_items);
886         push_space -= leaf_data_end(root, left);
887         /* make room in the right data area */
888         btrfs_memmove(root, right, btrfs_leaf_data(right) +
889                       leaf_data_end(root, right) - push_space,
890                       btrfs_leaf_data(right) +
891                       leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) -
892                       leaf_data_end(root, right));
893         /* copy from the left data area */
894         btrfs_memcpy(root, right, btrfs_leaf_data(right) +
895                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
896                      btrfs_leaf_data(left) + leaf_data_end(root, left),
897                      push_space);
898         btrfs_memmove(root, right, right->items + push_items, right->items,
899                 right_nritems * sizeof(struct btrfs_item));
900         /* copy the items from left to right */
901         btrfs_memcpy(root, right, right->items, left->items +
902                      left_nritems - push_items,
903                      push_items * sizeof(struct btrfs_item));
904
905         /* update the item pointers */
906         right_nritems += push_items;
907         btrfs_set_header_nritems(&right->header, right_nritems);
908         push_space = BTRFS_LEAF_DATA_SIZE(root);
909         for (i = 0; i < right_nritems; i++) {
910                 btrfs_set_item_offset(right->items + i, push_space -
911                                       btrfs_item_size(right->items + i));
912                 push_space = btrfs_item_offset(right->items + i);
913         }
914         left_nritems -= push_items;
915         btrfs_set_header_nritems(&left->header, left_nritems);
916
917         btrfs_mark_buffer_dirty(left_buf);
918         btrfs_mark_buffer_dirty(right_buf);
919         btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key,
920                 &right->items[0].key, sizeof(struct btrfs_disk_key));
921         btrfs_mark_buffer_dirty(upper);
922
923         /* then fixup the leaf pointer in the path */
924         if (path->slots[0] >= left_nritems) {
925                 path->slots[0] -= left_nritems;
926                 btrfs_block_release(root, path->nodes[0]);
927                 path->nodes[0] = right_buf;
928                 path->slots[1] += 1;
929         } else {
930                 btrfs_block_release(root, right_buf);
931         }
932         return 0;
933 }
934 /*
935  * push some data in the path leaf to the left, trying to free up at
936  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
937  */
938 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
939                           *root, struct btrfs_path *path, int data_size)
940 {
941         struct buffer_head *right_buf = path->nodes[0];
942         struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf);
943         struct buffer_head *t;
944         struct btrfs_leaf *left;
945         int slot;
946         int i;
947         int free_space;
948         int push_space = 0;
949         int push_items = 0;
950         struct btrfs_item *item;
951         u32 old_left_nritems;
952         int ret = 0;
953         int wret;
954
955         slot = path->slots[1];
956         if (slot == 0) {
957                 return 1;
958         }
959         if (!path->nodes[1]) {
960                 return 1;
961         }
962         t = read_tree_block(root,
963             btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1));
964         left = btrfs_buffer_leaf(t);
965         free_space = btrfs_leaf_free_space(root, left);
966         if (free_space < data_size + sizeof(struct btrfs_item)) {
967                 btrfs_block_release(root, t);
968                 return 1;
969         }
970
971         /* cow and double check */
972         btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t);
973         left = btrfs_buffer_leaf(t);
974         free_space = btrfs_leaf_free_space(root, left);
975         if (free_space < data_size + sizeof(struct btrfs_item)) {
976                 btrfs_block_release(root, t);
977                 return 1;
978         }
979
980         for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
981                 item = right->items + i;
982                 if (path->slots[0] == i)
983                         push_space += data_size + sizeof(*item);
984                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
985                     free_space)
986                         break;
987                 push_items++;
988                 push_space += btrfs_item_size(item) + sizeof(*item);
989         }
990         if (push_items == 0) {
991                 btrfs_block_release(root, t);
992                 return 1;
993         }
994         /* push data from right to left */
995         btrfs_memcpy(root, left, left->items +
996                      btrfs_header_nritems(&left->header),
997                      right->items, push_items * sizeof(struct btrfs_item));
998         push_space = BTRFS_LEAF_DATA_SIZE(root) -
999                      btrfs_item_offset(right->items + push_items -1);
1000         btrfs_memcpy(root, left, btrfs_leaf_data(left) +
1001                      leaf_data_end(root, left) - push_space,
1002                      btrfs_leaf_data(right) +
1003                      btrfs_item_offset(right->items + push_items - 1),
1004                      push_space);
1005         old_left_nritems = btrfs_header_nritems(&left->header);
1006         BUG_ON(old_left_nritems < 0);
1007
1008         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1009                 u32 ioff = btrfs_item_offset(left->items + i);
1010                 btrfs_set_item_offset(left->items + i, ioff -
1011                                      (BTRFS_LEAF_DATA_SIZE(root) -
1012                                       btrfs_item_offset(left->items +
1013                                                         old_left_nritems - 1)));
1014         }
1015         btrfs_set_header_nritems(&left->header, old_left_nritems + push_items);
1016
1017         /* fixup right node */
1018         push_space = btrfs_item_offset(right->items + push_items - 1) -
1019                      leaf_data_end(root, right);
1020         btrfs_memmove(root, right, btrfs_leaf_data(right) +
1021                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1022                       btrfs_leaf_data(right) +
1023                       leaf_data_end(root, right), push_space);
1024         btrfs_memmove(root, right, right->items, right->items + push_items,
1025                 (btrfs_header_nritems(&right->header) - push_items) *
1026                 sizeof(struct btrfs_item));
1027         btrfs_set_header_nritems(&right->header,
1028                                  btrfs_header_nritems(&right->header) -
1029                                  push_items);
1030         push_space = BTRFS_LEAF_DATA_SIZE(root);
1031
1032         for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1033                 btrfs_set_item_offset(right->items + i, push_space -
1034                                       btrfs_item_size(right->items + i));
1035                 push_space = btrfs_item_offset(right->items + i);
1036         }
1037
1038         btrfs_mark_buffer_dirty(t);
1039         btrfs_mark_buffer_dirty(right_buf);
1040
1041         wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
1042         if (wret)
1043                 ret = wret;
1044
1045         /* then fixup the leaf pointer in the path */
1046         if (path->slots[0] < push_items) {
1047                 path->slots[0] += old_left_nritems;
1048                 btrfs_block_release(root, path->nodes[0]);
1049                 path->nodes[0] = t;
1050                 path->slots[1] -= 1;
1051         } else {
1052                 btrfs_block_release(root, t);
1053                 path->slots[0] -= push_items;
1054         }
1055         BUG_ON(path->slots[0] < 0);
1056         return ret;
1057 }
1058
1059 /*
1060  * split the path's leaf in two, making sure there is at least data_size
1061  * available for the resulting leaf level of the path.
1062  *
1063  * returns 0 if all went well and < 0 on failure.
1064  */
1065 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1066                       *root, struct btrfs_path *path, int data_size)
1067 {
1068         struct buffer_head *l_buf;
1069         struct btrfs_leaf *l;
1070         u32 nritems;
1071         int mid;
1072         int slot;
1073         struct btrfs_leaf *right;
1074         struct buffer_head *right_buffer;
1075         int space_needed = data_size + sizeof(struct btrfs_item);
1076         int data_copy_size;
1077         int rt_data_off;
1078         int i;
1079         int ret;
1080         int wret;
1081
1082         /* first try to make some room by pushing left and right */
1083         wret = push_leaf_left(trans, root, path, data_size);
1084         if (wret < 0)
1085                 return wret;
1086         if (wret) {
1087                 wret = push_leaf_right(trans, root, path, data_size);
1088                 if (wret < 0)
1089                         return wret;
1090         }
1091         l_buf = path->nodes[0];
1092         l = btrfs_buffer_leaf(l_buf);
1093
1094         /* did the pushes work? */
1095         if (btrfs_leaf_free_space(root, l) >=
1096             sizeof(struct btrfs_item) + data_size)
1097                 return 0;
1098
1099         if (!path->nodes[1]) {
1100                 ret = insert_new_root(trans, root, path, 1);
1101                 if (ret)
1102                         return ret;
1103         }
1104         slot = path->slots[0];
1105         nritems = btrfs_header_nritems(&l->header);
1106         mid = (nritems + 1)/ 2;
1107         right_buffer = btrfs_alloc_free_block(trans, root);
1108         BUG_ON(!right_buffer);
1109         BUG_ON(mid == nritems);
1110         right = btrfs_buffer_leaf(right_buffer);
1111         memset(&right->header, 0, sizeof(right->header));
1112         if (mid <= slot) {
1113                 /* FIXME, just alloc a new leaf here */
1114                 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
1115                         BTRFS_LEAF_DATA_SIZE(root))
1116                         BUG();
1117         } else {
1118                 /* FIXME, just alloc a new leaf here */
1119                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1120                         BTRFS_LEAF_DATA_SIZE(root))
1121                         BUG();
1122         }
1123         btrfs_set_header_nritems(&right->header, nritems - mid);
1124         btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr);
1125         btrfs_set_header_generation(&right->header, trans->transid);
1126         btrfs_set_header_level(&right->header, 0);
1127         btrfs_set_header_parentid(&right->header,
1128               btrfs_header_parentid(btrfs_buffer_header(root->node)));
1129         data_copy_size = btrfs_item_end(l->items + mid) -
1130                          leaf_data_end(root, l);
1131         btrfs_memcpy(root, right, right->items, l->items + mid,
1132                      (nritems - mid) * sizeof(struct btrfs_item));
1133         btrfs_memcpy(root, right,
1134                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1135                      data_copy_size, btrfs_leaf_data(l) +
1136                      leaf_data_end(root, l), data_copy_size);
1137         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1138                       btrfs_item_end(l->items + mid);
1139
1140         for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1141                 u32 ioff = btrfs_item_offset(right->items + i);
1142                 btrfs_set_item_offset(right->items + i, ioff + rt_data_off);
1143         }
1144
1145         btrfs_set_header_nritems(&l->header, mid);
1146         ret = 0;
1147         wret = insert_ptr(trans, root, path, &right->items[0].key,
1148                           right_buffer->b_blocknr, path->slots[1] + 1, 1);
1149         if (wret)
1150                 ret = wret;
1151         btrfs_mark_buffer_dirty(right_buffer);
1152         btrfs_mark_buffer_dirty(l_buf);
1153         BUG_ON(path->slots[0] != slot);
1154         if (mid <= slot) {
1155                 btrfs_block_release(root, path->nodes[0]);
1156                 path->nodes[0] = right_buffer;
1157                 path->slots[0] -= mid;
1158                 path->slots[1] += 1;
1159         } else
1160                 btrfs_block_release(root, right_buffer);
1161         BUG_ON(path->slots[0] < 0);
1162         return ret;
1163 }
1164
1165 /*
1166  * Given a key and some data, insert an item into the tree.
1167  * This does all the path init required, making room in the tree if needed.
1168  */
1169 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
1170                             *root, struct btrfs_path *path, struct btrfs_key
1171                             *cpu_key, u32 data_size)
1172 {
1173         int ret = 0;
1174         int slot;
1175         int slot_orig;
1176         struct btrfs_leaf *leaf;
1177         struct buffer_head *leaf_buf;
1178         u32 nritems;
1179         unsigned int data_end;
1180         struct btrfs_disk_key disk_key;
1181
1182         btrfs_cpu_key_to_disk(&disk_key, cpu_key);
1183
1184         /* create a root if there isn't one */
1185         if (!root->node)
1186                 BUG();
1187         ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
1188         if (ret == 0) {
1189                 return -EEXIST;
1190         }
1191         if (ret < 0)
1192                 goto out;
1193
1194         slot_orig = path->slots[0];
1195         leaf_buf = path->nodes[0];
1196         leaf = btrfs_buffer_leaf(leaf_buf);
1197
1198         nritems = btrfs_header_nritems(&leaf->header);
1199         data_end = leaf_data_end(root, leaf);
1200
1201         if (btrfs_leaf_free_space(root, leaf) <
1202             sizeof(struct btrfs_item) + data_size)
1203                 BUG();
1204
1205         slot = path->slots[0];
1206         BUG_ON(slot < 0);
1207         if (slot != nritems) {
1208                 int i;
1209                 unsigned int old_data = btrfs_item_end(leaf->items + slot);
1210
1211                 /*
1212                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
1213                  */
1214                 /* first correct the data pointers */
1215                 for (i = slot; i < nritems; i++) {
1216                         u32 ioff = btrfs_item_offset(leaf->items + i);
1217                         btrfs_set_item_offset(leaf->items + i,
1218                                               ioff - data_size);
1219                 }
1220
1221                 /* shift the items */
1222                 btrfs_memmove(root, leaf, leaf->items + slot + 1,
1223                               leaf->items + slot,
1224                               (nritems - slot) * sizeof(struct btrfs_item));
1225
1226                 /* shift the data */
1227                 btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1228                               data_end - data_size, btrfs_leaf_data(leaf) +
1229                               data_end, old_data - data_end);
1230                 data_end = old_data;
1231         }
1232         /* setup the item for the new data */
1233         btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key,
1234                      sizeof(struct btrfs_disk_key));
1235         btrfs_set_item_offset(leaf->items + slot, data_end - data_size);
1236         btrfs_set_item_size(leaf->items + slot, data_size);
1237         btrfs_set_header_nritems(&leaf->header, nritems + 1);
1238         btrfs_mark_buffer_dirty(leaf_buf);
1239
1240         ret = 0;
1241         if (slot == 0)
1242                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
1243
1244         if (btrfs_leaf_free_space(root, leaf) < 0)
1245                 BUG();
1246         check_leaf(root, path, 0);
1247 out:
1248         return ret;
1249 }
1250
1251 /*
1252  * Given a key and some data, insert an item into the tree.
1253  * This does all the path init required, making room in the tree if needed.
1254  */
1255 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
1256                       *root, struct btrfs_key *cpu_key, void *data, u32
1257                       data_size)
1258 {
1259         int ret = 0;
1260         struct btrfs_path path;
1261         u8 *ptr;
1262
1263         btrfs_init_path(&path);
1264         ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size);
1265         if (!ret) {
1266                 ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
1267                                      path.slots[0], u8);
1268                 btrfs_memcpy(root, path.nodes[0]->b_data,
1269                              ptr, data, data_size);
1270                 btrfs_mark_buffer_dirty(path.nodes[0]);
1271         }
1272         btrfs_release_path(root, &path);
1273         return ret;
1274 }
1275
1276 /*
1277  * delete the pointer from a given node.
1278  *
1279  * If the delete empties a node, the node is removed from the tree,
1280  * continuing all the way the root if required.  The root is converted into
1281  * a leaf if all the nodes are emptied.
1282  */
1283 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1284                    struct btrfs_path *path, int level, int slot)
1285 {
1286         struct btrfs_node *node;
1287         struct buffer_head *parent = path->nodes[level];
1288         u32 nritems;
1289         int ret = 0;
1290         int wret;
1291
1292         node = btrfs_buffer_node(parent);
1293         nritems = btrfs_header_nritems(&node->header);
1294         if (slot != nritems -1) {
1295                 btrfs_memmove(root, node, node->ptrs + slot,
1296                               node->ptrs + slot + 1,
1297                               sizeof(struct btrfs_key_ptr) *
1298                               (nritems - slot - 1));
1299         }
1300         nritems--;
1301         btrfs_set_header_nritems(&node->header, nritems);
1302         if (nritems == 0 && parent == root->node) {
1303                 struct btrfs_header *header = btrfs_buffer_header(root->node);
1304                 BUG_ON(btrfs_header_level(header) != 1);
1305                 /* just turn the root into a leaf and break */
1306                 btrfs_set_header_level(header, 0);
1307         } else if (slot == 0) {
1308                 wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key,
1309                                       level + 1);
1310                 if (wret)
1311                         ret = wret;
1312         }
1313         btrfs_mark_buffer_dirty(parent);
1314         return ret;
1315 }
1316
1317 /*
1318  * delete the item at the leaf level in path.  If that empties
1319  * the leaf, remove it from the tree
1320  */
1321 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1322                    struct btrfs_path *path)
1323 {
1324         int slot;
1325         struct btrfs_leaf *leaf;
1326         struct buffer_head *leaf_buf;
1327         int doff;
1328         int dsize;
1329         int ret = 0;
1330         int wret;
1331         u32 nritems;
1332
1333         leaf_buf = path->nodes[0];
1334         leaf = btrfs_buffer_leaf(leaf_buf);
1335         slot = path->slots[0];
1336         doff = btrfs_item_offset(leaf->items + slot);
1337         dsize = btrfs_item_size(leaf->items + slot);
1338         nritems = btrfs_header_nritems(&leaf->header);
1339
1340         if (slot != nritems - 1) {
1341                 int i;
1342                 int data_end = leaf_data_end(root, leaf);
1343                 btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1344                               data_end + dsize,
1345                               btrfs_leaf_data(leaf) + data_end,
1346                               doff - data_end);
1347                 for (i = slot + 1; i < nritems; i++) {
1348                         u32 ioff = btrfs_item_offset(leaf->items + i);
1349                         btrfs_set_item_offset(leaf->items + i, ioff + dsize);
1350                 }
1351                 btrfs_memmove(root, leaf, leaf->items + slot,
1352                               leaf->items + slot + 1,
1353                               sizeof(struct btrfs_item) *
1354                               (nritems - slot - 1));
1355         }
1356         btrfs_set_header_nritems(&leaf->header, nritems - 1);
1357         nritems--;
1358         /* delete the leaf if we've emptied it */
1359         if (nritems == 0) {
1360                 if (leaf_buf == root->node) {
1361                         btrfs_set_header_level(&leaf->header, 0);
1362                 } else {
1363                         clean_tree_block(trans, root, leaf_buf);
1364                         wait_on_buffer(leaf_buf);
1365                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
1366                         if (wret)
1367                                 ret = wret;
1368                         wret = btrfs_free_extent(trans, root,
1369                                                  leaf_buf->b_blocknr, 1, 1);
1370                         if (wret)
1371                                 ret = wret;
1372                 }
1373         } else {
1374                 int used = leaf_space_used(leaf, 0, nritems);
1375                 if (slot == 0) {
1376                         wret = fixup_low_keys(trans, root, path,
1377                                               &leaf->items[0].key, 1);
1378                         if (wret)
1379                                 ret = wret;
1380                 }
1381
1382                 /* delete the leaf if it is mostly empty */
1383                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
1384                         /* push_leaf_left fixes the path.
1385                          * make sure the path still points to our leaf
1386                          * for possible call to del_ptr below
1387                          */
1388                         slot = path->slots[1];
1389                         get_bh(leaf_buf);
1390                         wret = push_leaf_left(trans, root, path, 1);
1391                         if (wret < 0)
1392                                 ret = wret;
1393                         if (path->nodes[0] == leaf_buf &&
1394                             btrfs_header_nritems(&leaf->header)) {
1395                                 wret = push_leaf_right(trans, root, path, 1);
1396                                 if (wret < 0)
1397                                         ret = wret;
1398                         }
1399                         if (btrfs_header_nritems(&leaf->header) == 0) {
1400                                 u64 blocknr = leaf_buf->b_blocknr;
1401                                 clean_tree_block(trans, root, leaf_buf);
1402                                 wait_on_buffer(leaf_buf);
1403                                 wret = del_ptr(trans, root, path, 1, slot);
1404                                 if (wret)
1405                                         ret = wret;
1406                                 btrfs_block_release(root, leaf_buf);
1407                                 wret = btrfs_free_extent(trans, root, blocknr,
1408                                                          1, 1);
1409                                 if (wret)
1410                                         ret = wret;
1411                         } else {
1412                                 btrfs_mark_buffer_dirty(leaf_buf);
1413                                 btrfs_block_release(root, leaf_buf);
1414                         }
1415                 } else {
1416                         btrfs_mark_buffer_dirty(leaf_buf);
1417                 }
1418         }
1419         return ret;
1420 }
1421
1422 /*
1423  * walk up the tree as far as required to find the next leaf.
1424  * returns 0 if it found something or 1 if there are no greater leaves.
1425  * returns < 0 on io errors.
1426  */
1427 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
1428 {
1429         int slot;
1430         int level = 1;
1431         u64 blocknr;
1432         struct buffer_head *c;
1433         struct btrfs_node *c_node;
1434         struct buffer_head *next = NULL;
1435
1436         while(level < BTRFS_MAX_LEVEL) {
1437                 if (!path->nodes[level])
1438                         return 1;
1439                 slot = path->slots[level] + 1;
1440                 c = path->nodes[level];
1441                 c_node = btrfs_buffer_node(c);
1442                 if (slot >= btrfs_header_nritems(&c_node->header)) {
1443                         level++;
1444                         continue;
1445                 }
1446                 blocknr = btrfs_node_blockptr(c_node, slot);
1447                 if (next)
1448                         btrfs_block_release(root, next);
1449                 next = read_tree_block(root, blocknr);
1450                 break;
1451         }
1452         path->slots[level] = slot;
1453         while(1) {
1454                 level--;
1455                 c = path->nodes[level];
1456                 btrfs_block_release(root, c);
1457                 path->nodes[level] = next;
1458                 path->slots[level] = 0;
1459                 if (!level)
1460                         break;
1461                 next = read_tree_block(root,
1462                        btrfs_node_blockptr(btrfs_buffer_node(next), 0));
1463         }
1464         return 0;
1465 }