nilfs2: Combine nilfs_btree_alloc_path() and nilfs_btree_init_path()
[safe/jmp/linux-2.6] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33
34 /**
35  * struct nilfs_btree_path - A path on which B-tree operations are executed
36  * @bp_bh: buffer head of node block
37  * @bp_sib_bh: buffer head of sibling node block
38  * @bp_index: index of child node
39  * @bp_oldreq: ptr end request for old ptr
40  * @bp_newreq: ptr alloc request for new ptr
41  * @bp_op: rebalance operation
42  */
43 struct nilfs_btree_path {
44         struct buffer_head *bp_bh;
45         struct buffer_head *bp_sib_bh;
46         int bp_index;
47         union nilfs_bmap_ptr_req bp_oldreq;
48         union nilfs_bmap_ptr_req bp_newreq;
49         struct nilfs_btnode_chkey_ctxt bp_ctxt;
50         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
51                       int, __u64 *, __u64 *);
52 };
53
54 /*
55  * B-tree path operations
56  */
57
58 static struct kmem_cache *nilfs_btree_path_cache;
59
60 int __init nilfs_btree_path_cache_init(void)
61 {
62         nilfs_btree_path_cache =
63                 kmem_cache_create("nilfs2_btree_path_cache",
64                                   sizeof(struct nilfs_btree_path) *
65                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
66         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
67 }
68
69 void nilfs_btree_path_cache_destroy(void)
70 {
71         kmem_cache_destroy(nilfs_btree_path_cache);
72 }
73
74 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
75 {
76         struct nilfs_btree_path *path;
77         int level = NILFS_BTREE_LEVEL_DATA;
78
79         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
80         if (path == NULL)
81                 goto out;
82
83         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
84                 path[level].bp_bh = NULL;
85                 path[level].bp_sib_bh = NULL;
86                 path[level].bp_index = 0;
87                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
88                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
89                 path[level].bp_op = NULL;
90         }
91
92 out:
93         return path;
94 }
95
96 static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
97 {
98         kmem_cache_free(nilfs_btree_path_cache, path);
99 }
100
101 static void nilfs_btree_release_path(struct nilfs_btree_path *path)
102 {
103         int level;
104
105         for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX;
106              level++)
107                 brelse(path[level].bp_bh);
108 }
109
110 /*
111  * B-tree node operations
112  */
113 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
114                                  struct buffer_head **bhp)
115 {
116         struct address_space *btnc =
117                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
118         int err;
119
120         err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp);
121         if (err)
122                 return err == -EEXIST ? 0 : err;
123
124         wait_on_buffer(*bhp);
125         if (!buffer_uptodate(*bhp)) {
126                 brelse(*bhp);
127                 return -EIO;
128         }
129         return 0;
130 }
131
132 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
133                                      __u64 ptr, struct buffer_head **bhp)
134 {
135         struct address_space *btnc =
136                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
137         struct buffer_head *bh;
138
139         bh = nilfs_btnode_create_block(btnc, ptr);
140         if (!bh)
141                 return -ENOMEM;
142
143         set_buffer_nilfs_volatile(bh);
144         *bhp = bh;
145         return 0;
146 }
147
148 static inline int
149 nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
150 {
151         return node->bn_flags;
152 }
153
154 static inline void
155 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
156 {
157         node->bn_flags = flags;
158 }
159
160 static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
161 {
162         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
163 }
164
165 static inline int
166 nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
167 {
168         return node->bn_level;
169 }
170
171 static inline void
172 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
173 {
174         node->bn_level = level;
175 }
176
177 static inline int
178 nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
179 {
180         return le16_to_cpu(node->bn_nchildren);
181 }
182
183 static inline void
184 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
185 {
186         node->bn_nchildren = cpu_to_le16(nchildren);
187 }
188
189 static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
190 {
191         return 1 << btree->bt_bmap.b_inode->i_blkbits;
192 }
193
194 static inline int
195 nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
196                                const struct nilfs_btree *btree)
197 {
198         return nilfs_btree_node_root(node) ?
199                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
200                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
201 }
202
203 static inline int
204 nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
205                                const struct nilfs_btree *btree)
206 {
207         return nilfs_btree_node_root(node) ?
208                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
209                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
210 }
211
212 static inline __le64 *
213 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
214 {
215         return (__le64 *)((char *)(node + 1) +
216                           (nilfs_btree_node_root(node) ?
217                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
218 }
219
220 static inline __le64 *
221 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
222                        const struct nilfs_btree *btree)
223 {
224         return (__le64 *)(nilfs_btree_node_dkeys(node) +
225                           nilfs_btree_node_nchildren_max(node, btree));
226 }
227
228 static inline __u64
229 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
230 {
231         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
232 }
233
234 static inline void
235 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
236 {
237         *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
238 }
239
240 static inline __u64
241 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
242                          const struct nilfs_btree_node *node, int index)
243 {
244         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
245                                         index));
246 }
247
248 static inline void
249 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
250                          struct nilfs_btree_node *node, int index, __u64 ptr)
251 {
252         *(nilfs_btree_node_dptrs(node, btree) + index) =
253                 nilfs_bmap_ptr_to_dptr(ptr);
254 }
255
256 static void nilfs_btree_node_init(struct nilfs_btree *btree,
257                                   struct nilfs_btree_node *node,
258                                   int flags, int level, int nchildren,
259                                   const __u64 *keys, const __u64 *ptrs)
260 {
261         __le64 *dkeys;
262         __le64 *dptrs;
263         int i;
264
265         nilfs_btree_node_set_flags(node, flags);
266         nilfs_btree_node_set_level(node, level);
267         nilfs_btree_node_set_nchildren(node, nchildren);
268
269         dkeys = nilfs_btree_node_dkeys(node);
270         dptrs = nilfs_btree_node_dptrs(node, btree);
271         for (i = 0; i < nchildren; i++) {
272                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
273                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
274         }
275 }
276
277 /* Assume the buffer heads corresponding to left and right are locked. */
278 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
279                                        struct nilfs_btree_node *left,
280                                        struct nilfs_btree_node *right,
281                                        int n)
282 {
283         __le64 *ldkeys, *rdkeys;
284         __le64 *ldptrs, *rdptrs;
285         int lnchildren, rnchildren;
286
287         ldkeys = nilfs_btree_node_dkeys(left);
288         ldptrs = nilfs_btree_node_dptrs(left, btree);
289         lnchildren = nilfs_btree_node_get_nchildren(left);
290
291         rdkeys = nilfs_btree_node_dkeys(right);
292         rdptrs = nilfs_btree_node_dptrs(right, btree);
293         rnchildren = nilfs_btree_node_get_nchildren(right);
294
295         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
296         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
297         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
298         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
299
300         lnchildren += n;
301         rnchildren -= n;
302         nilfs_btree_node_set_nchildren(left, lnchildren);
303         nilfs_btree_node_set_nchildren(right, rnchildren);
304 }
305
306 /* Assume that the buffer heads corresponding to left and right are locked. */
307 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
308                                         struct nilfs_btree_node *left,
309                                         struct nilfs_btree_node *right,
310                                         int n)
311 {
312         __le64 *ldkeys, *rdkeys;
313         __le64 *ldptrs, *rdptrs;
314         int lnchildren, rnchildren;
315
316         ldkeys = nilfs_btree_node_dkeys(left);
317         ldptrs = nilfs_btree_node_dptrs(left, btree);
318         lnchildren = nilfs_btree_node_get_nchildren(left);
319
320         rdkeys = nilfs_btree_node_dkeys(right);
321         rdptrs = nilfs_btree_node_dptrs(right, btree);
322         rnchildren = nilfs_btree_node_get_nchildren(right);
323
324         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
325         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
326         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
327         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
328
329         lnchildren -= n;
330         rnchildren += n;
331         nilfs_btree_node_set_nchildren(left, lnchildren);
332         nilfs_btree_node_set_nchildren(right, rnchildren);
333 }
334
335 /* Assume that the buffer head corresponding to node is locked. */
336 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
337                                     struct nilfs_btree_node *node,
338                                     __u64 key, __u64 ptr, int index)
339 {
340         __le64 *dkeys;
341         __le64 *dptrs;
342         int nchildren;
343
344         dkeys = nilfs_btree_node_dkeys(node);
345         dptrs = nilfs_btree_node_dptrs(node, btree);
346         nchildren = nilfs_btree_node_get_nchildren(node);
347         if (index < nchildren) {
348                 memmove(dkeys + index + 1, dkeys + index,
349                         (nchildren - index) * sizeof(*dkeys));
350                 memmove(dptrs + index + 1, dptrs + index,
351                         (nchildren - index) * sizeof(*dptrs));
352         }
353         dkeys[index] = nilfs_bmap_key_to_dkey(key);
354         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
355         nchildren++;
356         nilfs_btree_node_set_nchildren(node, nchildren);
357 }
358
359 /* Assume that the buffer head corresponding to node is locked. */
360 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
361                                     struct nilfs_btree_node *node,
362                                     __u64 *keyp, __u64 *ptrp, int index)
363 {
364         __u64 key;
365         __u64 ptr;
366         __le64 *dkeys;
367         __le64 *dptrs;
368         int nchildren;
369
370         dkeys = nilfs_btree_node_dkeys(node);
371         dptrs = nilfs_btree_node_dptrs(node, btree);
372         key = nilfs_bmap_dkey_to_key(dkeys[index]);
373         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
374         nchildren = nilfs_btree_node_get_nchildren(node);
375         if (keyp != NULL)
376                 *keyp = key;
377         if (ptrp != NULL)
378                 *ptrp = ptr;
379
380         if (index < nchildren - 1) {
381                 memmove(dkeys + index, dkeys + index + 1,
382                         (nchildren - index - 1) * sizeof(*dkeys));
383                 memmove(dptrs + index, dptrs + index + 1,
384                         (nchildren - index - 1) * sizeof(*dptrs));
385         }
386         nchildren--;
387         nilfs_btree_node_set_nchildren(node, nchildren);
388 }
389
390 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
391                                    __u64 key, int *indexp)
392 {
393         __u64 nkey;
394         int index, low, high, s;
395
396         /* binary search */
397         low = 0;
398         high = nilfs_btree_node_get_nchildren(node) - 1;
399         index = 0;
400         s = 0;
401         while (low <= high) {
402                 index = (low + high) / 2;
403                 nkey = nilfs_btree_node_get_key(node, index);
404                 if (nkey == key) {
405                         s = 0;
406                         goto out;
407                 } else if (nkey < key) {
408                         low = index + 1;
409                         s = -1;
410                 } else {
411                         high = index - 1;
412                         s = 1;
413                 }
414         }
415
416         /* adjust index */
417         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
418                 if (s > 0 && index > 0)
419                         index--;
420         } else if (s < 0)
421                 index++;
422
423  out:
424         *indexp = index;
425
426         return s == 0;
427 }
428
429 static inline struct nilfs_btree_node *
430 nilfs_btree_get_root(const struct nilfs_btree *btree)
431 {
432         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
433 }
434
435 static inline struct nilfs_btree_node *
436 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
437 {
438         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
439 }
440
441 static inline struct nilfs_btree_node *
442 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
443 {
444         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
445 }
446
447 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
448 {
449         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
450 }
451
452 static inline struct nilfs_btree_node *
453 nilfs_btree_get_node(const struct nilfs_btree *btree,
454                      const struct nilfs_btree_path *path,
455                      int level)
456 {
457         return (level == nilfs_btree_height(btree) - 1) ?
458                 nilfs_btree_get_root(btree) :
459                 nilfs_btree_get_nonroot_node(path, level);
460 }
461
462 static inline int
463 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
464 {
465         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
466                 dump_stack();
467                 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
468                        nilfs_btree_node_get_level(node), level);
469                 return 1;
470         }
471         return 0;
472 }
473
474 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
475                                  struct nilfs_btree_path *path,
476                                  __u64 key, __u64 *ptrp, int minlevel)
477 {
478         struct nilfs_btree_node *node;
479         __u64 ptr;
480         int level, index, found, ret;
481
482         node = nilfs_btree_get_root(btree);
483         level = nilfs_btree_node_get_level(node);
484         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
485                 return -ENOENT;
486
487         found = nilfs_btree_node_lookup(node, key, &index);
488         ptr = nilfs_btree_node_get_ptr(btree, node, index);
489         path[level].bp_bh = NULL;
490         path[level].bp_index = index;
491
492         for (level--; level >= minlevel; level--) {
493                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
494                 if (ret < 0)
495                         return ret;
496                 node = nilfs_btree_get_nonroot_node(path, level);
497                 if (nilfs_btree_bad_node(node, level))
498                         return -EINVAL;
499                 if (!found)
500                         found = nilfs_btree_node_lookup(node, key, &index);
501                 else
502                         index = 0;
503                 if (index < nilfs_btree_node_nchildren_max(node, btree))
504                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
505                 else {
506                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
507                         /* insert */
508                         ptr = NILFS_BMAP_INVALID_PTR;
509                 }
510                 path[level].bp_index = index;
511         }
512         if (!found)
513                 return -ENOENT;
514
515         if (ptrp != NULL)
516                 *ptrp = ptr;
517
518         return 0;
519 }
520
521 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
522                                       struct nilfs_btree_path *path,
523                                       __u64 *keyp, __u64 *ptrp)
524 {
525         struct nilfs_btree_node *node;
526         __u64 ptr;
527         int index, level, ret;
528
529         node = nilfs_btree_get_root(btree);
530         index = nilfs_btree_node_get_nchildren(node) - 1;
531         if (index < 0)
532                 return -ENOENT;
533         level = nilfs_btree_node_get_level(node);
534         ptr = nilfs_btree_node_get_ptr(btree, node, index);
535         path[level].bp_bh = NULL;
536         path[level].bp_index = index;
537
538         for (level--; level > 0; level--) {
539                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
540                 if (ret < 0)
541                         return ret;
542                 node = nilfs_btree_get_nonroot_node(path, level);
543                 if (nilfs_btree_bad_node(node, level))
544                         return -EINVAL;
545                 index = nilfs_btree_node_get_nchildren(node) - 1;
546                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
547                 path[level].bp_index = index;
548         }
549
550         if (keyp != NULL)
551                 *keyp = nilfs_btree_node_get_key(node, index);
552         if (ptrp != NULL)
553                 *ptrp = ptr;
554
555         return 0;
556 }
557
558 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
559                               __u64 key, int level, __u64 *ptrp)
560 {
561         struct nilfs_btree *btree;
562         struct nilfs_btree_path *path;
563         __u64 ptr;
564         int ret;
565
566         btree = (struct nilfs_btree *)bmap;
567         path = nilfs_btree_alloc_path();
568         if (path == NULL)
569                 return -ENOMEM;
570
571         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
572
573         if (ptrp != NULL)
574                 *ptrp = ptr;
575
576         nilfs_btree_release_path(path);
577         nilfs_btree_free_path(path);
578
579         return ret;
580 }
581
582 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
583                                      __u64 key, __u64 *ptrp, unsigned maxblocks)
584 {
585         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
586         struct nilfs_btree_path *path;
587         struct nilfs_btree_node *node;
588         struct inode *dat = NULL;
589         __u64 ptr, ptr2;
590         sector_t blocknr;
591         int level = NILFS_BTREE_LEVEL_NODE_MIN;
592         int ret, cnt, index, maxlevel;
593
594         path = nilfs_btree_alloc_path();
595         if (path == NULL)
596                 return -ENOMEM;
597
598         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
599         if (ret < 0)
600                 goto out;
601
602         if (NILFS_BMAP_USE_VBN(bmap)) {
603                 dat = nilfs_bmap_get_dat(bmap);
604                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
605                 if (ret < 0)
606                         goto out;
607                 ptr = blocknr;
608         }
609         cnt = 1;
610         if (cnt == maxblocks)
611                 goto end;
612
613         maxlevel = nilfs_btree_height(btree) - 1;
614         node = nilfs_btree_get_node(btree, path, level);
615         index = path[level].bp_index + 1;
616         for (;;) {
617                 while (index < nilfs_btree_node_get_nchildren(node)) {
618                         if (nilfs_btree_node_get_key(node, index) !=
619                             key + cnt)
620                                 goto end;
621                         ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
622                         if (dat) {
623                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
624                                 if (ret < 0)
625                                         goto out;
626                                 ptr2 = blocknr;
627                         }
628                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
629                                 goto end;
630                         index++;
631                         continue;
632                 }
633                 if (level == maxlevel)
634                         break;
635
636                 /* look-up right sibling node */
637                 node = nilfs_btree_get_node(btree, path, level + 1);
638                 index = path[level + 1].bp_index + 1;
639                 if (index >= nilfs_btree_node_get_nchildren(node) ||
640                     nilfs_btree_node_get_key(node, index) != key + cnt)
641                         break;
642                 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
643                 path[level + 1].bp_index = index;
644
645                 brelse(path[level].bp_bh);
646                 path[level].bp_bh = NULL;
647                 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
648                 if (ret < 0)
649                         goto out;
650                 node = nilfs_btree_get_nonroot_node(path, level);
651                 index = 0;
652                 path[level].bp_index = index;
653         }
654  end:
655         *ptrp = ptr;
656         ret = cnt;
657  out:
658         nilfs_btree_release_path(path);
659         nilfs_btree_free_path(path);
660         return ret;
661 }
662
663 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
664                                     struct nilfs_btree_path *path,
665                                     int level, __u64 key)
666 {
667         if (level < nilfs_btree_height(btree) - 1) {
668                 do {
669                         nilfs_btree_node_set_key(
670                                 nilfs_btree_get_nonroot_node(path, level),
671                                 path[level].bp_index, key);
672                         if (!buffer_dirty(path[level].bp_bh))
673                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
674                 } while ((path[level].bp_index == 0) &&
675                          (++level < nilfs_btree_height(btree) - 1));
676         }
677
678         /* root */
679         if (level == nilfs_btree_height(btree) - 1) {
680                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
681                                          path[level].bp_index, key);
682         }
683 }
684
685 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
686                                   struct nilfs_btree_path *path,
687                                   int level, __u64 *keyp, __u64 *ptrp)
688 {
689         struct nilfs_btree_node *node;
690
691         if (level < nilfs_btree_height(btree) - 1) {
692                 node = nilfs_btree_get_nonroot_node(path, level);
693                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
694                                         path[level].bp_index);
695                 if (!buffer_dirty(path[level].bp_bh))
696                         nilfs_btnode_mark_dirty(path[level].bp_bh);
697
698                 if (path[level].bp_index == 0)
699                         nilfs_btree_promote_key(btree, path, level + 1,
700                                                 nilfs_btree_node_get_key(node,
701                                                                          0));
702         } else {
703                 node = nilfs_btree_get_root(btree);
704                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
705                                         path[level].bp_index);
706         }
707 }
708
709 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
710                                    struct nilfs_btree_path *path,
711                                    int level, __u64 *keyp, __u64 *ptrp)
712 {
713         struct nilfs_btree_node *node, *left;
714         int nchildren, lnchildren, n, move;
715
716         node = nilfs_btree_get_nonroot_node(path, level);
717         left = nilfs_btree_get_sib_node(path, level);
718         nchildren = nilfs_btree_node_get_nchildren(node);
719         lnchildren = nilfs_btree_node_get_nchildren(left);
720         move = 0;
721
722         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
723         if (n > path[level].bp_index) {
724                 /* move insert point */
725                 n--;
726                 move = 1;
727         }
728
729         nilfs_btree_node_move_left(btree, left, node, n);
730
731         if (!buffer_dirty(path[level].bp_bh))
732                 nilfs_btnode_mark_dirty(path[level].bp_bh);
733         if (!buffer_dirty(path[level].bp_sib_bh))
734                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
735
736         nilfs_btree_promote_key(btree, path, level + 1,
737                                 nilfs_btree_node_get_key(node, 0));
738
739         if (move) {
740                 brelse(path[level].bp_bh);
741                 path[level].bp_bh = path[level].bp_sib_bh;
742                 path[level].bp_sib_bh = NULL;
743                 path[level].bp_index += lnchildren;
744                 path[level + 1].bp_index--;
745         } else {
746                 brelse(path[level].bp_sib_bh);
747                 path[level].bp_sib_bh = NULL;
748                 path[level].bp_index -= n;
749         }
750
751         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
752 }
753
754 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
755                                     struct nilfs_btree_path *path,
756                                     int level, __u64 *keyp, __u64 *ptrp)
757 {
758         struct nilfs_btree_node *node, *right;
759         int nchildren, rnchildren, n, move;
760
761         node = nilfs_btree_get_nonroot_node(path, level);
762         right = nilfs_btree_get_sib_node(path, level);
763         nchildren = nilfs_btree_node_get_nchildren(node);
764         rnchildren = nilfs_btree_node_get_nchildren(right);
765         move = 0;
766
767         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
768         if (n > nchildren - path[level].bp_index) {
769                 /* move insert point */
770                 n--;
771                 move = 1;
772         }
773
774         nilfs_btree_node_move_right(btree, node, right, n);
775
776         if (!buffer_dirty(path[level].bp_bh))
777                 nilfs_btnode_mark_dirty(path[level].bp_bh);
778         if (!buffer_dirty(path[level].bp_sib_bh))
779                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
780
781         path[level + 1].bp_index++;
782         nilfs_btree_promote_key(btree, path, level + 1,
783                                 nilfs_btree_node_get_key(right, 0));
784         path[level + 1].bp_index--;
785
786         if (move) {
787                 brelse(path[level].bp_bh);
788                 path[level].bp_bh = path[level].bp_sib_bh;
789                 path[level].bp_sib_bh = NULL;
790                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
791                 path[level + 1].bp_index++;
792         } else {
793                 brelse(path[level].bp_sib_bh);
794                 path[level].bp_sib_bh = NULL;
795         }
796
797         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
798 }
799
800 static void nilfs_btree_split(struct nilfs_btree *btree,
801                               struct nilfs_btree_path *path,
802                               int level, __u64 *keyp, __u64 *ptrp)
803 {
804         struct nilfs_btree_node *node, *right;
805         __u64 newkey;
806         __u64 newptr;
807         int nchildren, n, move;
808
809         node = nilfs_btree_get_nonroot_node(path, level);
810         right = nilfs_btree_get_sib_node(path, level);
811         nchildren = nilfs_btree_node_get_nchildren(node);
812         move = 0;
813
814         n = (nchildren + 1) / 2;
815         if (n > nchildren - path[level].bp_index) {
816                 n--;
817                 move = 1;
818         }
819
820         nilfs_btree_node_move_right(btree, node, right, n);
821
822         if (!buffer_dirty(path[level].bp_bh))
823                 nilfs_btnode_mark_dirty(path[level].bp_bh);
824         if (!buffer_dirty(path[level].bp_sib_bh))
825                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
826
827         newkey = nilfs_btree_node_get_key(right, 0);
828         newptr = path[level].bp_newreq.bpr_ptr;
829
830         if (move) {
831                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
832                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
833                                         path[level].bp_index);
834
835                 *keyp = nilfs_btree_node_get_key(right, 0);
836                 *ptrp = path[level].bp_newreq.bpr_ptr;
837
838                 brelse(path[level].bp_bh);
839                 path[level].bp_bh = path[level].bp_sib_bh;
840                 path[level].bp_sib_bh = NULL;
841         } else {
842                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
843
844                 *keyp = nilfs_btree_node_get_key(right, 0);
845                 *ptrp = path[level].bp_newreq.bpr_ptr;
846
847                 brelse(path[level].bp_sib_bh);
848                 path[level].bp_sib_bh = NULL;
849         }
850
851         path[level + 1].bp_index++;
852 }
853
854 static void nilfs_btree_grow(struct nilfs_btree *btree,
855                              struct nilfs_btree_path *path,
856                              int level, __u64 *keyp, __u64 *ptrp)
857 {
858         struct nilfs_btree_node *root, *child;
859         int n;
860
861         root = nilfs_btree_get_root(btree);
862         child = nilfs_btree_get_sib_node(path, level);
863
864         n = nilfs_btree_node_get_nchildren(root);
865
866         nilfs_btree_node_move_right(btree, root, child, n);
867         nilfs_btree_node_set_level(root, level + 1);
868
869         if (!buffer_dirty(path[level].bp_sib_bh))
870                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
871
872         path[level].bp_bh = path[level].bp_sib_bh;
873         path[level].bp_sib_bh = NULL;
874
875         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
876
877         *keyp = nilfs_btree_node_get_key(child, 0);
878         *ptrp = path[level].bp_newreq.bpr_ptr;
879 }
880
881 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
882                                    const struct nilfs_btree_path *path)
883 {
884         struct nilfs_btree_node *node;
885         int level;
886
887         if (path == NULL)
888                 return NILFS_BMAP_INVALID_PTR;
889
890         /* left sibling */
891         level = NILFS_BTREE_LEVEL_NODE_MIN;
892         if (path[level].bp_index > 0) {
893                 node = nilfs_btree_get_node(btree, path, level);
894                 return nilfs_btree_node_get_ptr(btree, node,
895                                                 path[level].bp_index - 1);
896         }
897
898         /* parent */
899         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
900         if (level <= nilfs_btree_height(btree) - 1) {
901                 node = nilfs_btree_get_node(btree, path, level);
902                 return nilfs_btree_node_get_ptr(btree, node,
903                                                 path[level].bp_index);
904         }
905
906         return NILFS_BMAP_INVALID_PTR;
907 }
908
909 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
910                                        const struct nilfs_btree_path *path,
911                                        __u64 key)
912 {
913         __u64 ptr;
914
915         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
916         if (ptr != NILFS_BMAP_INVALID_PTR)
917                 /* sequential access */
918                 return ptr;
919         else {
920                 ptr = nilfs_btree_find_near(btree, path);
921                 if (ptr != NILFS_BMAP_INVALID_PTR)
922                         /* near */
923                         return ptr;
924         }
925         /* block group */
926         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
927 }
928
929 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
930                                      __u64 ptr)
931 {
932         btree->bt_bmap.b_last_allocated_key = key;
933         btree->bt_bmap.b_last_allocated_ptr = ptr;
934 }
935
936 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
937                                       struct nilfs_btree_path *path,
938                                       int *levelp, __u64 key, __u64 ptr,
939                                       struct nilfs_bmap_stats *stats)
940 {
941         struct buffer_head *bh;
942         struct nilfs_btree_node *node, *parent, *sib;
943         __u64 sibptr;
944         int pindex, level, ret;
945         struct inode *dat = NULL;
946
947         stats->bs_nblocks = 0;
948         level = NILFS_BTREE_LEVEL_DATA;
949
950         /* allocate a new ptr for data block */
951         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
952                 path[level].bp_newreq.bpr_ptr =
953                         nilfs_btree_find_target_v(btree, path, key);
954                 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
955         }
956
957         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
958                                            &path[level].bp_newreq, dat);
959         if (ret < 0)
960                 goto err_out_data;
961
962         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
963              level < nilfs_btree_height(btree) - 1;
964              level++) {
965                 node = nilfs_btree_get_nonroot_node(path, level);
966                 if (nilfs_btree_node_get_nchildren(node) <
967                     nilfs_btree_node_nchildren_max(node, btree)) {
968                         path[level].bp_op = nilfs_btree_do_insert;
969                         stats->bs_nblocks++;
970                         goto out;
971                 }
972
973                 parent = nilfs_btree_get_node(btree, path, level + 1);
974                 pindex = path[level + 1].bp_index;
975
976                 /* left sibling */
977                 if (pindex > 0) {
978                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
979                                                           pindex - 1);
980                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
981                         if (ret < 0)
982                                 goto err_out_child_node;
983                         sib = (struct nilfs_btree_node *)bh->b_data;
984                         if (nilfs_btree_node_get_nchildren(sib) <
985                             nilfs_btree_node_nchildren_max(sib, btree)) {
986                                 path[level].bp_sib_bh = bh;
987                                 path[level].bp_op = nilfs_btree_carry_left;
988                                 stats->bs_nblocks++;
989                                 goto out;
990                         } else
991                                 brelse(bh);
992                 }
993
994                 /* right sibling */
995                 if (pindex <
996                     nilfs_btree_node_get_nchildren(parent) - 1) {
997                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
998                                                           pindex + 1);
999                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1000                         if (ret < 0)
1001                                 goto err_out_child_node;
1002                         sib = (struct nilfs_btree_node *)bh->b_data;
1003                         if (nilfs_btree_node_get_nchildren(sib) <
1004                             nilfs_btree_node_nchildren_max(sib, btree)) {
1005                                 path[level].bp_sib_bh = bh;
1006                                 path[level].bp_op = nilfs_btree_carry_right;
1007                                 stats->bs_nblocks++;
1008                                 goto out;
1009                         } else
1010                                 brelse(bh);
1011                 }
1012
1013                 /* split */
1014                 path[level].bp_newreq.bpr_ptr =
1015                         path[level - 1].bp_newreq.bpr_ptr + 1;
1016                 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1017                                                    &path[level].bp_newreq, dat);
1018                 if (ret < 0)
1019                         goto err_out_child_node;
1020                 ret = nilfs_btree_get_new_block(btree,
1021                                                 path[level].bp_newreq.bpr_ptr,
1022                                                 &bh);
1023                 if (ret < 0)
1024                         goto err_out_curr_node;
1025
1026                 stats->bs_nblocks++;
1027
1028                 nilfs_btree_node_init(btree,
1029                                       (struct nilfs_btree_node *)bh->b_data,
1030                                       0, level, 0, NULL, NULL);
1031                 path[level].bp_sib_bh = bh;
1032                 path[level].bp_op = nilfs_btree_split;
1033         }
1034
1035         /* root */
1036         node = nilfs_btree_get_root(btree);
1037         if (nilfs_btree_node_get_nchildren(node) <
1038             nilfs_btree_node_nchildren_max(node, btree)) {
1039                 path[level].bp_op = nilfs_btree_do_insert;
1040                 stats->bs_nblocks++;
1041                 goto out;
1042         }
1043
1044         /* grow */
1045         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1046         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1047                                            &path[level].bp_newreq, dat);
1048         if (ret < 0)
1049                 goto err_out_child_node;
1050         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1051                                         &bh);
1052         if (ret < 0)
1053                 goto err_out_curr_node;
1054
1055         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1056                               0, level, 0, NULL, NULL);
1057         path[level].bp_sib_bh = bh;
1058         path[level].bp_op = nilfs_btree_grow;
1059
1060         level++;
1061         path[level].bp_op = nilfs_btree_do_insert;
1062
1063         /* a newly-created node block and a data block are added */
1064         stats->bs_nblocks += 2;
1065
1066         /* success */
1067  out:
1068         *levelp = level;
1069         return ret;
1070
1071         /* error */
1072  err_out_curr_node:
1073         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1074                                    dat);
1075  err_out_child_node:
1076         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1077                 nilfs_btnode_delete(path[level].bp_sib_bh);
1078                 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1079                                            &path[level].bp_newreq, dat);
1080
1081         }
1082
1083         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1084                                    dat);
1085  err_out_data:
1086         *levelp = level;
1087         stats->bs_nblocks = 0;
1088         return ret;
1089 }
1090
1091 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1092                                       struct nilfs_btree_path *path,
1093                                       int maxlevel, __u64 key, __u64 ptr)
1094 {
1095         struct inode *dat = NULL;
1096         int level;
1097
1098         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1099         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1100         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
1101                 nilfs_btree_set_target_v(btree, key, ptr);
1102                 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1103         }
1104
1105         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1106                 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1107                                             &path[level - 1].bp_newreq, dat);
1108                 path[level].bp_op(btree, path, level, &key, &ptr);
1109         }
1110
1111         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1112                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1113 }
1114
1115 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1116 {
1117         struct nilfs_btree *btree;
1118         struct nilfs_btree_path *path;
1119         struct nilfs_bmap_stats stats;
1120         int level, ret;
1121
1122         btree = (struct nilfs_btree *)bmap;
1123         path = nilfs_btree_alloc_path();
1124         if (path == NULL)
1125                 return -ENOMEM;
1126
1127         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1128                                     NILFS_BTREE_LEVEL_NODE_MIN);
1129         if (ret != -ENOENT) {
1130                 if (ret == 0)
1131                         ret = -EEXIST;
1132                 goto out;
1133         }
1134
1135         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1136         if (ret < 0)
1137                 goto out;
1138         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1139         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1140
1141  out:
1142         nilfs_btree_release_path(path);
1143         nilfs_btree_free_path(path);
1144         return ret;
1145 }
1146
1147 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1148                                   struct nilfs_btree_path *path,
1149                                   int level, __u64 *keyp, __u64 *ptrp)
1150 {
1151         struct nilfs_btree_node *node;
1152
1153         if (level < nilfs_btree_height(btree) - 1) {
1154                 node = nilfs_btree_get_nonroot_node(path, level);
1155                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1156                                         path[level].bp_index);
1157                 if (!buffer_dirty(path[level].bp_bh))
1158                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1159                 if (path[level].bp_index == 0)
1160                         nilfs_btree_promote_key(btree, path, level + 1,
1161                                 nilfs_btree_node_get_key(node, 0));
1162         } else {
1163                 node = nilfs_btree_get_root(btree);
1164                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1165                                         path[level].bp_index);
1166         }
1167 }
1168
1169 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1170                                     struct nilfs_btree_path *path,
1171                                     int level, __u64 *keyp, __u64 *ptrp)
1172 {
1173         struct nilfs_btree_node *node, *left;
1174         int nchildren, lnchildren, n;
1175
1176         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1177
1178         node = nilfs_btree_get_nonroot_node(path, level);
1179         left = nilfs_btree_get_sib_node(path, level);
1180         nchildren = nilfs_btree_node_get_nchildren(node);
1181         lnchildren = nilfs_btree_node_get_nchildren(left);
1182
1183         n = (nchildren + lnchildren) / 2 - nchildren;
1184
1185         nilfs_btree_node_move_right(btree, left, node, n);
1186
1187         if (!buffer_dirty(path[level].bp_bh))
1188                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1189         if (!buffer_dirty(path[level].bp_sib_bh))
1190                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1191
1192         nilfs_btree_promote_key(btree, path, level + 1,
1193                                 nilfs_btree_node_get_key(node, 0));
1194
1195         brelse(path[level].bp_sib_bh);
1196         path[level].bp_sib_bh = NULL;
1197         path[level].bp_index += n;
1198 }
1199
1200 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1201                                      struct nilfs_btree_path *path,
1202                                      int level, __u64 *keyp, __u64 *ptrp)
1203 {
1204         struct nilfs_btree_node *node, *right;
1205         int nchildren, rnchildren, n;
1206
1207         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1208
1209         node = nilfs_btree_get_nonroot_node(path, level);
1210         right = nilfs_btree_get_sib_node(path, level);
1211         nchildren = nilfs_btree_node_get_nchildren(node);
1212         rnchildren = nilfs_btree_node_get_nchildren(right);
1213
1214         n = (nchildren + rnchildren) / 2 - nchildren;
1215
1216         nilfs_btree_node_move_left(btree, node, right, n);
1217
1218         if (!buffer_dirty(path[level].bp_bh))
1219                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1220         if (!buffer_dirty(path[level].bp_sib_bh))
1221                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1222
1223         path[level + 1].bp_index++;
1224         nilfs_btree_promote_key(btree, path, level + 1,
1225                                 nilfs_btree_node_get_key(right, 0));
1226         path[level + 1].bp_index--;
1227
1228         brelse(path[level].bp_sib_bh);
1229         path[level].bp_sib_bh = NULL;
1230 }
1231
1232 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1233                                     struct nilfs_btree_path *path,
1234                                     int level, __u64 *keyp, __u64 *ptrp)
1235 {
1236         struct nilfs_btree_node *node, *left;
1237         int n;
1238
1239         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1240
1241         node = nilfs_btree_get_nonroot_node(path, level);
1242         left = nilfs_btree_get_sib_node(path, level);
1243
1244         n = nilfs_btree_node_get_nchildren(node);
1245
1246         nilfs_btree_node_move_left(btree, left, node, n);
1247
1248         if (!buffer_dirty(path[level].bp_sib_bh))
1249                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1250
1251         nilfs_btnode_delete(path[level].bp_bh);
1252         path[level].bp_bh = path[level].bp_sib_bh;
1253         path[level].bp_sib_bh = NULL;
1254         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1255 }
1256
1257 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1258                                      struct nilfs_btree_path *path,
1259                                      int level, __u64 *keyp, __u64 *ptrp)
1260 {
1261         struct nilfs_btree_node *node, *right;
1262         int n;
1263
1264         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1265
1266         node = nilfs_btree_get_nonroot_node(path, level);
1267         right = nilfs_btree_get_sib_node(path, level);
1268
1269         n = nilfs_btree_node_get_nchildren(right);
1270
1271         nilfs_btree_node_move_left(btree, node, right, n);
1272
1273         if (!buffer_dirty(path[level].bp_bh))
1274                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1275
1276         nilfs_btnode_delete(path[level].bp_sib_bh);
1277         path[level].bp_sib_bh = NULL;
1278         path[level + 1].bp_index++;
1279 }
1280
1281 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1282                                struct nilfs_btree_path *path,
1283                                int level, __u64 *keyp, __u64 *ptrp)
1284 {
1285         struct nilfs_btree_node *root, *child;
1286         int n;
1287
1288         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1289
1290         root = nilfs_btree_get_root(btree);
1291         child = nilfs_btree_get_nonroot_node(path, level);
1292
1293         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1294         nilfs_btree_node_set_level(root, level);
1295         n = nilfs_btree_node_get_nchildren(child);
1296         nilfs_btree_node_move_left(btree, root, child, n);
1297
1298         nilfs_btnode_delete(path[level].bp_bh);
1299         path[level].bp_bh = NULL;
1300 }
1301
1302
1303 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1304                                       struct nilfs_btree_path *path,
1305                                       int *levelp,
1306                                       struct nilfs_bmap_stats *stats,
1307                                       struct inode *dat)
1308 {
1309         struct buffer_head *bh;
1310         struct nilfs_btree_node *node, *parent, *sib;
1311         __u64 sibptr;
1312         int pindex, level, ret;
1313
1314         ret = 0;
1315         stats->bs_nblocks = 0;
1316         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1317              level < nilfs_btree_height(btree) - 1;
1318              level++) {
1319                 node = nilfs_btree_get_nonroot_node(path, level);
1320                 path[level].bp_oldreq.bpr_ptr =
1321                         nilfs_btree_node_get_ptr(btree, node,
1322                                                  path[level].bp_index);
1323                 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1324                                                  &path[level].bp_oldreq, dat);
1325                 if (ret < 0)
1326                         goto err_out_child_node;
1327
1328                 if (nilfs_btree_node_get_nchildren(node) >
1329                     nilfs_btree_node_nchildren_min(node, btree)) {
1330                         path[level].bp_op = nilfs_btree_do_delete;
1331                         stats->bs_nblocks++;
1332                         goto out;
1333                 }
1334
1335                 parent = nilfs_btree_get_node(btree, path, level + 1);
1336                 pindex = path[level + 1].bp_index;
1337
1338                 if (pindex > 0) {
1339                         /* left sibling */
1340                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1341                                                           pindex - 1);
1342                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1343                         if (ret < 0)
1344                                 goto err_out_curr_node;
1345                         sib = (struct nilfs_btree_node *)bh->b_data;
1346                         if (nilfs_btree_node_get_nchildren(sib) >
1347                             nilfs_btree_node_nchildren_min(sib, btree)) {
1348                                 path[level].bp_sib_bh = bh;
1349                                 path[level].bp_op = nilfs_btree_borrow_left;
1350                                 stats->bs_nblocks++;
1351                                 goto out;
1352                         } else {
1353                                 path[level].bp_sib_bh = bh;
1354                                 path[level].bp_op = nilfs_btree_concat_left;
1355                                 stats->bs_nblocks++;
1356                                 /* continue; */
1357                         }
1358                 } else if (pindex <
1359                            nilfs_btree_node_get_nchildren(parent) - 1) {
1360                         /* right sibling */
1361                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1362                                                           pindex + 1);
1363                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1364                         if (ret < 0)
1365                                 goto err_out_curr_node;
1366                         sib = (struct nilfs_btree_node *)bh->b_data;
1367                         if (nilfs_btree_node_get_nchildren(sib) >
1368                             nilfs_btree_node_nchildren_min(sib, btree)) {
1369                                 path[level].bp_sib_bh = bh;
1370                                 path[level].bp_op = nilfs_btree_borrow_right;
1371                                 stats->bs_nblocks++;
1372                                 goto out;
1373                         } else {
1374                                 path[level].bp_sib_bh = bh;
1375                                 path[level].bp_op = nilfs_btree_concat_right;
1376                                 stats->bs_nblocks++;
1377                                 /* continue; */
1378                         }
1379                 } else {
1380                         /* no siblings */
1381                         /* the only child of the root node */
1382                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1383                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1384                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1385                                 path[level].bp_op = nilfs_btree_shrink;
1386                                 stats->bs_nblocks += 2;
1387                         } else {
1388                                 path[level].bp_op = nilfs_btree_do_delete;
1389                                 stats->bs_nblocks++;
1390                         }
1391
1392                         goto out;
1393
1394                 }
1395         }
1396
1397         node = nilfs_btree_get_root(btree);
1398         path[level].bp_oldreq.bpr_ptr =
1399                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1400
1401         ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1402                                          &path[level].bp_oldreq, dat);
1403         if (ret < 0)
1404                 goto err_out_child_node;
1405
1406         /* child of the root node is deleted */
1407         path[level].bp_op = nilfs_btree_do_delete;
1408         stats->bs_nblocks++;
1409
1410         /* success */
1411  out:
1412         *levelp = level;
1413         return ret;
1414
1415         /* error */
1416  err_out_curr_node:
1417         nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
1418  err_out_child_node:
1419         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1420                 brelse(path[level].bp_sib_bh);
1421                 nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1422                                          &path[level].bp_oldreq, dat);
1423         }
1424         *levelp = level;
1425         stats->bs_nblocks = 0;
1426         return ret;
1427 }
1428
1429 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1430                                       struct nilfs_btree_path *path,
1431                                       int maxlevel, struct inode *dat)
1432 {
1433         int level;
1434
1435         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1436                 nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1437                                           &path[level].bp_oldreq, dat);
1438                 path[level].bp_op(btree, path, level, NULL, NULL);
1439         }
1440
1441         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1442                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1443 }
1444
1445 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1446
1447 {
1448         struct nilfs_btree *btree;
1449         struct nilfs_btree_path *path;
1450         struct nilfs_bmap_stats stats;
1451         struct inode *dat;
1452         int level, ret;
1453
1454         btree = (struct nilfs_btree *)bmap;
1455         path = nilfs_btree_alloc_path();
1456         if (path == NULL)
1457                 return -ENOMEM;
1458
1459         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1460                                     NILFS_BTREE_LEVEL_NODE_MIN);
1461         if (ret < 0)
1462                 goto out;
1463
1464
1465         dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
1466                 nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1467
1468         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1469         if (ret < 0)
1470                 goto out;
1471         nilfs_btree_commit_delete(btree, path, level, dat);
1472         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1473
1474 out:
1475         nilfs_btree_release_path(path);
1476         nilfs_btree_free_path(path);
1477         return ret;
1478 }
1479
1480 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1481 {
1482         struct nilfs_btree *btree;
1483         struct nilfs_btree_path *path;
1484         int ret;
1485
1486         btree = (struct nilfs_btree *)bmap;
1487         path = nilfs_btree_alloc_path();
1488         if (path == NULL)
1489                 return -ENOMEM;
1490
1491         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1492
1493         nilfs_btree_release_path(path);
1494         nilfs_btree_free_path(path);
1495
1496         return ret;
1497 }
1498
1499 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1500 {
1501         struct buffer_head *bh;
1502         struct nilfs_btree *btree;
1503         struct nilfs_btree_node *root, *node;
1504         __u64 maxkey, nextmaxkey;
1505         __u64 ptr;
1506         int nchildren, ret;
1507
1508         btree = (struct nilfs_btree *)bmap;
1509         root = nilfs_btree_get_root(btree);
1510         switch (nilfs_btree_height(btree)) {
1511         case 2:
1512                 bh = NULL;
1513                 node = root;
1514                 break;
1515         case 3:
1516                 nchildren = nilfs_btree_node_get_nchildren(root);
1517                 if (nchildren > 1)
1518                         return 0;
1519                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1520                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1521                 if (ret < 0)
1522                         return ret;
1523                 node = (struct nilfs_btree_node *)bh->b_data;
1524                 break;
1525         default:
1526                 return 0;
1527         }
1528
1529         nchildren = nilfs_btree_node_get_nchildren(node);
1530         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1531         nextmaxkey = (nchildren > 1) ?
1532                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1533         if (bh != NULL)
1534                 brelse(bh);
1535
1536         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1537 }
1538
1539 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1540                                    __u64 *keys, __u64 *ptrs, int nitems)
1541 {
1542         struct buffer_head *bh;
1543         struct nilfs_btree *btree;
1544         struct nilfs_btree_node *node, *root;
1545         __le64 *dkeys;
1546         __le64 *dptrs;
1547         __u64 ptr;
1548         int nchildren, i, ret;
1549
1550         btree = (struct nilfs_btree *)bmap;
1551         root = nilfs_btree_get_root(btree);
1552         switch (nilfs_btree_height(btree)) {
1553         case 2:
1554                 bh = NULL;
1555                 node = root;
1556                 break;
1557         case 3:
1558                 nchildren = nilfs_btree_node_get_nchildren(root);
1559                 WARN_ON(nchildren > 1);
1560                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1561                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1562                 if (ret < 0)
1563                         return ret;
1564                 node = (struct nilfs_btree_node *)bh->b_data;
1565                 break;
1566         default:
1567                 node = NULL;
1568                 return -EINVAL;
1569         }
1570
1571         nchildren = nilfs_btree_node_get_nchildren(node);
1572         if (nchildren < nitems)
1573                 nitems = nchildren;
1574         dkeys = nilfs_btree_node_dkeys(node);
1575         dptrs = nilfs_btree_node_dptrs(node, btree);
1576         for (i = 0; i < nitems; i++) {
1577                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1578                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1579         }
1580
1581         if (bh != NULL)
1582                 brelse(bh);
1583
1584         return nitems;
1585 }
1586
1587 static int
1588 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1589                                        union nilfs_bmap_ptr_req *dreq,
1590                                        union nilfs_bmap_ptr_req *nreq,
1591                                        struct buffer_head **bhp,
1592                                        struct nilfs_bmap_stats *stats)
1593 {
1594         struct buffer_head *bh;
1595         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1596         struct inode *dat = NULL;
1597         int ret;
1598
1599         stats->bs_nblocks = 0;
1600
1601         /* for data */
1602         /* cannot find near ptr */
1603         if (NILFS_BMAP_USE_VBN(bmap)) {
1604                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1605                 dat = nilfs_bmap_get_dat(bmap);
1606         }
1607
1608         ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
1609         if (ret < 0)
1610                 return ret;
1611
1612         *bhp = NULL;
1613         stats->bs_nblocks++;
1614         if (nreq != NULL) {
1615                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1616                 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
1617                 if (ret < 0)
1618                         goto err_out_dreq;
1619
1620                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1621                 if (ret < 0)
1622                         goto err_out_nreq;
1623
1624                 *bhp = bh;
1625                 stats->bs_nblocks++;
1626         }
1627
1628         /* success */
1629         return 0;
1630
1631         /* error */
1632  err_out_nreq:
1633         nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
1634  err_out_dreq:
1635         nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
1636         stats->bs_nblocks = 0;
1637         return ret;
1638
1639 }
1640
1641 static void
1642 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1643                                       __u64 key, __u64 ptr,
1644                                       const __u64 *keys, const __u64 *ptrs,
1645                                       int n,
1646                                       union nilfs_bmap_ptr_req *dreq,
1647                                       union nilfs_bmap_ptr_req *nreq,
1648                                       struct buffer_head *bh)
1649 {
1650         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1651         struct nilfs_btree_node *node;
1652         struct inode *dat;
1653         __u64 tmpptr;
1654
1655         /* free resources */
1656         if (bmap->b_ops->bop_clear != NULL)
1657                 bmap->b_ops->bop_clear(bmap);
1658
1659         /* ptr must be a pointer to a buffer head. */
1660         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1661
1662         /* convert and insert */
1663         dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
1664         nilfs_btree_init(bmap);
1665         if (nreq != NULL) {
1666                 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1667                 nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1668
1669                 /* create child node at level 1 */
1670                 node = (struct nilfs_btree_node *)bh->b_data;
1671                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1672                 nilfs_btree_node_insert(btree, node,
1673                                         key, dreq->bpr_ptr, n);
1674                 if (!buffer_dirty(bh))
1675                         nilfs_btnode_mark_dirty(bh);
1676                 if (!nilfs_bmap_dirty(bmap))
1677                         nilfs_bmap_set_dirty(bmap);
1678
1679                 brelse(bh);
1680
1681                 /* create root node at level 2 */
1682                 node = nilfs_btree_get_root(btree);
1683                 tmpptr = nreq->bpr_ptr;
1684                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1685                                       2, 1, &keys[0], &tmpptr);
1686         } else {
1687                 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1688
1689                 /* create root node at level 1 */
1690                 node = nilfs_btree_get_root(btree);
1691                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1692                                       1, n, keys, ptrs);
1693                 nilfs_btree_node_insert(btree, node,
1694                                         key, dreq->bpr_ptr, n);
1695                 if (!nilfs_bmap_dirty(bmap))
1696                         nilfs_bmap_set_dirty(bmap);
1697         }
1698
1699         if (NILFS_BMAP_USE_VBN(bmap))
1700                 nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1701 }
1702
1703 /**
1704  * nilfs_btree_convert_and_insert -
1705  * @bmap:
1706  * @key:
1707  * @ptr:
1708  * @keys:
1709  * @ptrs:
1710  * @n:
1711  */
1712 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1713                                    __u64 key, __u64 ptr,
1714                                    const __u64 *keys, const __u64 *ptrs, int n)
1715 {
1716         struct buffer_head *bh;
1717         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1718         struct nilfs_bmap_stats stats;
1719         int ret;
1720
1721         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1722                 di = &dreq;
1723                 ni = NULL;
1724         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1725                            1 << bmap->b_inode->i_blkbits)) {
1726                 di = &dreq;
1727                 ni = &nreq;
1728         } else {
1729                 di = NULL;
1730                 ni = NULL;
1731                 BUG();
1732         }
1733
1734         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1735                                                      &stats);
1736         if (ret < 0)
1737                 return ret;
1738         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1739                                               di, ni, bh);
1740         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1741         return 0;
1742 }
1743
1744 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1745                                    struct nilfs_btree_path *path,
1746                                    int level,
1747                                    struct buffer_head *bh)
1748 {
1749         while ((++level < nilfs_btree_height(btree) - 1) &&
1750                !buffer_dirty(path[level].bp_bh))
1751                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1752
1753         return 0;
1754 }
1755
1756 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1757                                         struct nilfs_btree_path *path,
1758                                         int level, struct inode *dat)
1759 {
1760         struct nilfs_btree_node *parent;
1761         int ret;
1762
1763         parent = nilfs_btree_get_node(btree, path, level + 1);
1764         path[level].bp_oldreq.bpr_ptr =
1765                 nilfs_btree_node_get_ptr(btree, parent,
1766                                          path[level + 1].bp_index);
1767         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1768         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1769                                        &path[level].bp_newreq.bpr_req);
1770         if (ret < 0)
1771                 return ret;
1772
1773         if (buffer_nilfs_node(path[level].bp_bh)) {
1774                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1775                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1776                 path[level].bp_ctxt.bh = path[level].bp_bh;
1777                 ret = nilfs_btnode_prepare_change_key(
1778                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1779                         &path[level].bp_ctxt);
1780                 if (ret < 0) {
1781                         nilfs_dat_abort_update(dat,
1782                                                &path[level].bp_oldreq.bpr_req,
1783                                                &path[level].bp_newreq.bpr_req);
1784                         return ret;
1785                 }
1786         }
1787
1788         return 0;
1789 }
1790
1791 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1792                                         struct nilfs_btree_path *path,
1793                                         int level, struct inode *dat)
1794 {
1795         struct nilfs_btree_node *parent;
1796
1797         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1798                                 &path[level].bp_newreq.bpr_req,
1799                                 btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
1800
1801         if (buffer_nilfs_node(path[level].bp_bh)) {
1802                 nilfs_btnode_commit_change_key(
1803                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1804                         &path[level].bp_ctxt);
1805                 path[level].bp_bh = path[level].bp_ctxt.bh;
1806         }
1807         set_buffer_nilfs_volatile(path[level].bp_bh);
1808
1809         parent = nilfs_btree_get_node(btree, path, level + 1);
1810         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1811                                  path[level].bp_newreq.bpr_ptr);
1812 }
1813
1814 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1815                                        struct nilfs_btree_path *path,
1816                                        int level, struct inode *dat)
1817 {
1818         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1819                                &path[level].bp_newreq.bpr_req);
1820         if (buffer_nilfs_node(path[level].bp_bh))
1821                 nilfs_btnode_abort_change_key(
1822                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1823                         &path[level].bp_ctxt);
1824 }
1825
1826 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1827                                            struct nilfs_btree_path *path,
1828                                            int minlevel, int *maxlevelp,
1829                                            struct inode *dat)
1830 {
1831         int level, ret;
1832
1833         level = minlevel;
1834         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1835                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1836                 if (ret < 0)
1837                         return ret;
1838         }
1839         while ((++level < nilfs_btree_height(btree) - 1) &&
1840                !buffer_dirty(path[level].bp_bh)) {
1841
1842                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1843                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1844                 if (ret < 0)
1845                         goto out;
1846         }
1847
1848         /* success */
1849         *maxlevelp = level - 1;
1850         return 0;
1851
1852         /* error */
1853  out:
1854         while (--level > minlevel)
1855                 nilfs_btree_abort_update_v(btree, path, level, dat);
1856         if (!buffer_nilfs_volatile(path[level].bp_bh))
1857                 nilfs_btree_abort_update_v(btree, path, level, dat);
1858         return ret;
1859 }
1860
1861 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1862                                            struct nilfs_btree_path *path,
1863                                            int minlevel, int maxlevel,
1864                                            struct buffer_head *bh,
1865                                            struct inode *dat)
1866 {
1867         int level;
1868
1869         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1870                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1871
1872         for (level = minlevel + 1; level <= maxlevel; level++)
1873                 nilfs_btree_commit_update_v(btree, path, level, dat);
1874 }
1875
1876 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1877                                    struct nilfs_btree_path *path,
1878                                    int level, struct buffer_head *bh)
1879 {
1880         int maxlevel = 0, ret;
1881         struct nilfs_btree_node *parent;
1882         struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1883         __u64 ptr;
1884
1885         get_bh(bh);
1886         path[level].bp_bh = bh;
1887         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1888                                               dat);
1889         if (ret < 0)
1890                 goto out;
1891
1892         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1893                 parent = nilfs_btree_get_node(btree, path, level + 1);
1894                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1895                                                path[level + 1].bp_index);
1896                 ret = nilfs_dat_mark_dirty(dat, ptr);
1897                 if (ret < 0)
1898                         goto out;
1899         }
1900
1901         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1902
1903  out:
1904         brelse(path[level].bp_bh);
1905         path[level].bp_bh = NULL;
1906         return ret;
1907 }
1908
1909 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1910                                  struct buffer_head *bh)
1911 {
1912         struct nilfs_btree *btree;
1913         struct nilfs_btree_path *path;
1914         struct nilfs_btree_node *node;
1915         __u64 key;
1916         int level, ret;
1917
1918         WARN_ON(!buffer_dirty(bh));
1919
1920         btree = (struct nilfs_btree *)bmap;
1921         path = nilfs_btree_alloc_path();
1922         if (path == NULL)
1923                 return -ENOMEM;
1924
1925         if (buffer_nilfs_node(bh)) {
1926                 node = (struct nilfs_btree_node *)bh->b_data;
1927                 key = nilfs_btree_node_get_key(node, 0);
1928                 level = nilfs_btree_node_get_level(node);
1929         } else {
1930                 key = nilfs_bmap_data_get_key(bmap, bh);
1931                 level = NILFS_BTREE_LEVEL_DATA;
1932         }
1933
1934         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1935         if (ret < 0) {
1936                 if (unlikely(ret == -ENOENT))
1937                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1938                                __func__, (unsigned long long)key, level);
1939                 goto out;
1940         }
1941
1942         ret = NILFS_BMAP_USE_VBN(bmap) ?
1943                 nilfs_btree_propagate_v(btree, path, level, bh) :
1944                 nilfs_btree_propagate_p(btree, path, level, bh);
1945
1946  out:
1947         nilfs_btree_release_path(path);
1948         nilfs_btree_free_path(path);
1949
1950         return ret;
1951 }
1952
1953 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1954                                     struct buffer_head *bh)
1955 {
1956         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
1957 }
1958
1959 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1960                                          struct list_head *lists,
1961                                          struct buffer_head *bh)
1962 {
1963         struct list_head *head;
1964         struct buffer_head *cbh;
1965         struct nilfs_btree_node *node, *cnode;
1966         __u64 key, ckey;
1967         int level;
1968
1969         get_bh(bh);
1970         node = (struct nilfs_btree_node *)bh->b_data;
1971         key = nilfs_btree_node_get_key(node, 0);
1972         level = nilfs_btree_node_get_level(node);
1973         list_for_each(head, &lists[level]) {
1974                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1975                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1976                 ckey = nilfs_btree_node_get_key(cnode, 0);
1977                 if (key < ckey)
1978                         break;
1979         }
1980         list_add_tail(&bh->b_assoc_buffers, head);
1981 }
1982
1983 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1984                                              struct list_head *listp)
1985 {
1986         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1987         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1988         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1989         struct pagevec pvec;
1990         struct buffer_head *bh, *head;
1991         pgoff_t index = 0;
1992         int level, i;
1993
1994         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1995              level < NILFS_BTREE_LEVEL_MAX;
1996              level++)
1997                 INIT_LIST_HEAD(&lists[level]);
1998
1999         pagevec_init(&pvec, 0);
2000
2001         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2002                                   PAGEVEC_SIZE)) {
2003                 for (i = 0; i < pagevec_count(&pvec); i++) {
2004                         bh = head = page_buffers(pvec.pages[i]);
2005                         do {
2006                                 if (buffer_dirty(bh))
2007                                         nilfs_btree_add_dirty_buffer(btree,
2008                                                                      lists, bh);
2009                         } while ((bh = bh->b_this_page) != head);
2010                 }
2011                 pagevec_release(&pvec);
2012                 cond_resched();
2013         }
2014
2015         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2016              level < NILFS_BTREE_LEVEL_MAX;
2017              level++)
2018                 list_splice_tail(&lists[level], listp);
2019 }
2020
2021 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2022                                 struct nilfs_btree_path *path,
2023                                 int level,
2024                                 struct buffer_head **bh,
2025                                 sector_t blocknr,
2026                                 union nilfs_binfo *binfo)
2027 {
2028         struct nilfs_btree_node *parent;
2029         __u64 key;
2030         __u64 ptr;
2031         int ret;
2032
2033         parent = nilfs_btree_get_node(btree, path, level + 1);
2034         ptr = nilfs_btree_node_get_ptr(btree, parent,
2035                                        path[level + 1].bp_index);
2036         if (buffer_nilfs_node(*bh)) {
2037                 path[level].bp_ctxt.oldkey = ptr;
2038                 path[level].bp_ctxt.newkey = blocknr;
2039                 path[level].bp_ctxt.bh = *bh;
2040                 ret = nilfs_btnode_prepare_change_key(
2041                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2042                         &path[level].bp_ctxt);
2043                 if (ret < 0)
2044                         return ret;
2045                 nilfs_btnode_commit_change_key(
2046                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2047                         &path[level].bp_ctxt);
2048                 *bh = path[level].bp_ctxt.bh;
2049         }
2050
2051         nilfs_btree_node_set_ptr(btree, parent,
2052                                  path[level + 1].bp_index, blocknr);
2053
2054         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2055         /* on-disk format */
2056         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2057         binfo->bi_dat.bi_level = level;
2058
2059         return 0;
2060 }
2061
2062 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2063                                 struct nilfs_btree_path *path,
2064                                 int level,
2065                                 struct buffer_head **bh,
2066                                 sector_t blocknr,
2067                                 union nilfs_binfo *binfo)
2068 {
2069         struct nilfs_btree_node *parent;
2070         struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
2071         __u64 key;
2072         __u64 ptr;
2073         union nilfs_bmap_ptr_req req;
2074         int ret;
2075
2076         parent = nilfs_btree_get_node(btree, path, level + 1);
2077         ptr = nilfs_btree_node_get_ptr(btree, parent,
2078                                        path[level + 1].bp_index);
2079         req.bpr_ptr = ptr;
2080         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2081         if (ret < 0)
2082                 return ret;
2083         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2084
2085         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2086         /* on-disk format */
2087         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2088         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2089
2090         return 0;
2091 }
2092
2093 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2094                               struct buffer_head **bh,
2095                               sector_t blocknr,
2096                               union nilfs_binfo *binfo)
2097 {
2098         struct nilfs_btree *btree;
2099         struct nilfs_btree_path *path;
2100         struct nilfs_btree_node *node;
2101         __u64 key;
2102         int level, ret;
2103
2104         btree = (struct nilfs_btree *)bmap;
2105         path = nilfs_btree_alloc_path();
2106         if (path == NULL)
2107                 return -ENOMEM;
2108
2109         if (buffer_nilfs_node(*bh)) {
2110                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2111                 key = nilfs_btree_node_get_key(node, 0);
2112                 level = nilfs_btree_node_get_level(node);
2113         } else {
2114                 key = nilfs_bmap_data_get_key(bmap, *bh);
2115                 level = NILFS_BTREE_LEVEL_DATA;
2116         }
2117
2118         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2119         if (ret < 0) {
2120                 WARN_ON(ret == -ENOENT);
2121                 goto out;
2122         }
2123
2124         ret = NILFS_BMAP_USE_VBN(bmap) ?
2125                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2126                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2127
2128  out:
2129         nilfs_btree_release_path(path);
2130         nilfs_btree_free_path(path);
2131
2132         return ret;
2133 }
2134
2135 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2136                                  struct buffer_head **bh,
2137                                  sector_t blocknr,
2138                                  union nilfs_binfo *binfo)
2139 {
2140         struct nilfs_btree_node *node;
2141         __u64 key;
2142         int ret;
2143
2144         ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
2145                              blocknr);
2146         if (ret < 0)
2147                 return ret;
2148
2149         if (buffer_nilfs_node(*bh)) {
2150                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2151                 key = nilfs_btree_node_get_key(node, 0);
2152         } else
2153                 key = nilfs_bmap_data_get_key(bmap, *bh);
2154
2155         /* on-disk format */
2156         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2157         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2158
2159         return 0;
2160 }
2161
2162 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2163 {
2164         struct buffer_head *bh;
2165         struct nilfs_btree *btree;
2166         struct nilfs_btree_path *path;
2167         __u64 ptr;
2168         int ret;
2169
2170         btree = (struct nilfs_btree *)bmap;
2171         path = nilfs_btree_alloc_path();
2172         if (path == NULL)
2173                 return -ENOMEM;
2174
2175         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2176         if (ret < 0) {
2177                 WARN_ON(ret == -ENOENT);
2178                 goto out;
2179         }
2180         ret = nilfs_btree_get_block(btree, ptr, &bh);
2181         if (ret < 0) {
2182                 WARN_ON(ret == -ENOENT);
2183                 goto out;
2184         }
2185
2186         if (!buffer_dirty(bh))
2187                 nilfs_btnode_mark_dirty(bh);
2188         brelse(bh);
2189         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2190                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2191
2192  out:
2193         nilfs_btree_release_path(path);
2194         nilfs_btree_free_path(path);
2195         return ret;
2196 }
2197
2198 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2199         .bop_lookup             =       nilfs_btree_lookup,
2200         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2201         .bop_insert             =       nilfs_btree_insert,
2202         .bop_delete             =       nilfs_btree_delete,
2203         .bop_clear              =       NULL,
2204
2205         .bop_propagate          =       nilfs_btree_propagate,
2206
2207         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2208
2209         .bop_assign             =       nilfs_btree_assign,
2210         .bop_mark               =       nilfs_btree_mark,
2211
2212         .bop_last_key           =       nilfs_btree_last_key,
2213         .bop_check_insert       =       NULL,
2214         .bop_check_delete       =       nilfs_btree_check_delete,
2215         .bop_gather_data        =       nilfs_btree_gather_data,
2216 };
2217
2218 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2219         .bop_lookup             =       NULL,
2220         .bop_lookup_contig      =       NULL,
2221         .bop_insert             =       NULL,
2222         .bop_delete             =       NULL,
2223         .bop_clear              =       NULL,
2224
2225         .bop_propagate          =       nilfs_btree_propagate_gc,
2226
2227         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2228
2229         .bop_assign             =       nilfs_btree_assign_gc,
2230         .bop_mark               =       NULL,
2231
2232         .bop_last_key           =       NULL,
2233         .bop_check_insert       =       NULL,
2234         .bop_check_delete       =       NULL,
2235         .bop_gather_data        =       NULL,
2236 };
2237
2238 int nilfs_btree_init(struct nilfs_bmap *bmap)
2239 {
2240         bmap->b_ops = &nilfs_btree_ops;
2241         return 0;
2242 }
2243
2244 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2245 {
2246         bmap->b_ops = &nilfs_btree_ops_gc;
2247 }