nilfs2: get rid of BUG_ON use in btree lookup routines
[safe/jmp/linux-2.6] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33
34 /**
35  * struct nilfs_btree_path - A path on which B-tree operations are executed
36  * @bp_bh: buffer head of node block
37  * @bp_sib_bh: buffer head of sibling node block
38  * @bp_index: index of child node
39  * @bp_oldreq: ptr end request for old ptr
40  * @bp_newreq: ptr alloc request for new ptr
41  * @bp_op: rebalance operation
42  */
43 struct nilfs_btree_path {
44         struct buffer_head *bp_bh;
45         struct buffer_head *bp_sib_bh;
46         int bp_index;
47         union nilfs_bmap_ptr_req bp_oldreq;
48         union nilfs_bmap_ptr_req bp_newreq;
49         struct nilfs_btnode_chkey_ctxt bp_ctxt;
50         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
51                       int, __u64 *, __u64 *);
52 };
53
54 /*
55  * B-tree path operations
56  */
57
58 static struct kmem_cache *nilfs_btree_path_cache;
59
60 int __init nilfs_btree_path_cache_init(void)
61 {
62         nilfs_btree_path_cache =
63                 kmem_cache_create("nilfs2_btree_path_cache",
64                                   sizeof(struct nilfs_btree_path) *
65                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
66         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
67 }
68
69 void nilfs_btree_path_cache_destroy(void)
70 {
71         kmem_cache_destroy(nilfs_btree_path_cache);
72 }
73
74 static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void)
75 {
76         return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
77 }
78
79 static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
80 {
81         kmem_cache_free(nilfs_btree_path_cache, path);
82 }
83
84 static void nilfs_btree_init_path(struct nilfs_btree_path *path)
85 {
86         int level;
87
88         for (level = NILFS_BTREE_LEVEL_DATA;
89              level < NILFS_BTREE_LEVEL_MAX;
90              level++) {
91                 path[level].bp_bh = NULL;
92                 path[level].bp_sib_bh = NULL;
93                 path[level].bp_index = 0;
94                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
95                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
96                 path[level].bp_op = NULL;
97         }
98 }
99
100 static void nilfs_btree_release_path(struct nilfs_btree_path *path)
101 {
102         int level;
103
104         for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX;
105              level++)
106                 brelse(path[level].bp_bh);
107 }
108
109 /*
110  * B-tree node operations
111  */
112 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
113                                  struct buffer_head **bhp)
114 {
115         struct address_space *btnc =
116                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
117         return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
118 }
119
120 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
121                                      __u64 ptr, struct buffer_head **bhp)
122 {
123         struct address_space *btnc =
124                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
125         int ret;
126
127         ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
128         if (!ret)
129                 set_buffer_nilfs_volatile(*bhp);
130         return ret;
131 }
132
133 static inline int
134 nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
135 {
136         return node->bn_flags;
137 }
138
139 static inline void
140 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
141 {
142         node->bn_flags = flags;
143 }
144
145 static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
146 {
147         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
148 }
149
150 static inline int
151 nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
152 {
153         return node->bn_level;
154 }
155
156 static inline void
157 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
158 {
159         node->bn_level = level;
160 }
161
162 static inline int
163 nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
164 {
165         return le16_to_cpu(node->bn_nchildren);
166 }
167
168 static inline void
169 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
170 {
171         node->bn_nchildren = cpu_to_le16(nchildren);
172 }
173
174 static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
175 {
176         return 1 << btree->bt_bmap.b_inode->i_blkbits;
177 }
178
179 static inline int
180 nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
181                                const struct nilfs_btree *btree)
182 {
183         return nilfs_btree_node_root(node) ?
184                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
185                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
186 }
187
188 static inline int
189 nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
190                                const struct nilfs_btree *btree)
191 {
192         return nilfs_btree_node_root(node) ?
193                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
194                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
195 }
196
197 static inline __le64 *
198 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
199 {
200         return (__le64 *)((char *)(node + 1) +
201                           (nilfs_btree_node_root(node) ?
202                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
203 }
204
205 static inline __le64 *
206 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
207                        const struct nilfs_btree *btree)
208 {
209         return (__le64 *)(nilfs_btree_node_dkeys(node) +
210                           nilfs_btree_node_nchildren_max(node, btree));
211 }
212
213 static inline __u64
214 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
215 {
216         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
217 }
218
219 static inline void
220 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
221 {
222         *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
223 }
224
225 static inline __u64
226 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
227                          const struct nilfs_btree_node *node, int index)
228 {
229         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
230                                         index));
231 }
232
233 static inline void
234 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
235                          struct nilfs_btree_node *node, int index, __u64 ptr)
236 {
237         *(nilfs_btree_node_dptrs(node, btree) + index) =
238                 nilfs_bmap_ptr_to_dptr(ptr);
239 }
240
241 static void nilfs_btree_node_init(struct nilfs_btree *btree,
242                                   struct nilfs_btree_node *node,
243                                   int flags, int level, int nchildren,
244                                   const __u64 *keys, const __u64 *ptrs)
245 {
246         __le64 *dkeys;
247         __le64 *dptrs;
248         int i;
249
250         nilfs_btree_node_set_flags(node, flags);
251         nilfs_btree_node_set_level(node, level);
252         nilfs_btree_node_set_nchildren(node, nchildren);
253
254         dkeys = nilfs_btree_node_dkeys(node);
255         dptrs = nilfs_btree_node_dptrs(node, btree);
256         for (i = 0; i < nchildren; i++) {
257                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
258                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
259         }
260 }
261
262 /* Assume the buffer heads corresponding to left and right are locked. */
263 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
264                                        struct nilfs_btree_node *left,
265                                        struct nilfs_btree_node *right,
266                                        int n)
267 {
268         __le64 *ldkeys, *rdkeys;
269         __le64 *ldptrs, *rdptrs;
270         int lnchildren, rnchildren;
271
272         ldkeys = nilfs_btree_node_dkeys(left);
273         ldptrs = nilfs_btree_node_dptrs(left, btree);
274         lnchildren = nilfs_btree_node_get_nchildren(left);
275
276         rdkeys = nilfs_btree_node_dkeys(right);
277         rdptrs = nilfs_btree_node_dptrs(right, btree);
278         rnchildren = nilfs_btree_node_get_nchildren(right);
279
280         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
281         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
282         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
283         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
284
285         lnchildren += n;
286         rnchildren -= n;
287         nilfs_btree_node_set_nchildren(left, lnchildren);
288         nilfs_btree_node_set_nchildren(right, rnchildren);
289 }
290
291 /* Assume that the buffer heads corresponding to left and right are locked. */
292 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
293                                         struct nilfs_btree_node *left,
294                                         struct nilfs_btree_node *right,
295                                         int n)
296 {
297         __le64 *ldkeys, *rdkeys;
298         __le64 *ldptrs, *rdptrs;
299         int lnchildren, rnchildren;
300
301         ldkeys = nilfs_btree_node_dkeys(left);
302         ldptrs = nilfs_btree_node_dptrs(left, btree);
303         lnchildren = nilfs_btree_node_get_nchildren(left);
304
305         rdkeys = nilfs_btree_node_dkeys(right);
306         rdptrs = nilfs_btree_node_dptrs(right, btree);
307         rnchildren = nilfs_btree_node_get_nchildren(right);
308
309         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
310         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
311         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
312         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
313
314         lnchildren -= n;
315         rnchildren += n;
316         nilfs_btree_node_set_nchildren(left, lnchildren);
317         nilfs_btree_node_set_nchildren(right, rnchildren);
318 }
319
320 /* Assume that the buffer head corresponding to node is locked. */
321 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
322                                     struct nilfs_btree_node *node,
323                                     __u64 key, __u64 ptr, int index)
324 {
325         __le64 *dkeys;
326         __le64 *dptrs;
327         int nchildren;
328
329         dkeys = nilfs_btree_node_dkeys(node);
330         dptrs = nilfs_btree_node_dptrs(node, btree);
331         nchildren = nilfs_btree_node_get_nchildren(node);
332         if (index < nchildren) {
333                 memmove(dkeys + index + 1, dkeys + index,
334                         (nchildren - index) * sizeof(*dkeys));
335                 memmove(dptrs + index + 1, dptrs + index,
336                         (nchildren - index) * sizeof(*dptrs));
337         }
338         dkeys[index] = nilfs_bmap_key_to_dkey(key);
339         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
340         nchildren++;
341         nilfs_btree_node_set_nchildren(node, nchildren);
342 }
343
344 /* Assume that the buffer head corresponding to node is locked. */
345 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
346                                     struct nilfs_btree_node *node,
347                                     __u64 *keyp, __u64 *ptrp, int index)
348 {
349         __u64 key;
350         __u64 ptr;
351         __le64 *dkeys;
352         __le64 *dptrs;
353         int nchildren;
354
355         dkeys = nilfs_btree_node_dkeys(node);
356         dptrs = nilfs_btree_node_dptrs(node, btree);
357         key = nilfs_bmap_dkey_to_key(dkeys[index]);
358         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
359         nchildren = nilfs_btree_node_get_nchildren(node);
360         if (keyp != NULL)
361                 *keyp = key;
362         if (ptrp != NULL)
363                 *ptrp = ptr;
364
365         if (index < nchildren - 1) {
366                 memmove(dkeys + index, dkeys + index + 1,
367                         (nchildren - index - 1) * sizeof(*dkeys));
368                 memmove(dptrs + index, dptrs + index + 1,
369                         (nchildren - index - 1) * sizeof(*dptrs));
370         }
371         nchildren--;
372         nilfs_btree_node_set_nchildren(node, nchildren);
373 }
374
375 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
376                                    __u64 key, int *indexp)
377 {
378         __u64 nkey;
379         int index, low, high, s;
380
381         /* binary search */
382         low = 0;
383         high = nilfs_btree_node_get_nchildren(node) - 1;
384         index = 0;
385         s = 0;
386         while (low <= high) {
387                 index = (low + high) / 2;
388                 nkey = nilfs_btree_node_get_key(node, index);
389                 if (nkey == key) {
390                         s = 0;
391                         goto out;
392                 } else if (nkey < key) {
393                         low = index + 1;
394                         s = -1;
395                 } else {
396                         high = index - 1;
397                         s = 1;
398                 }
399         }
400
401         /* adjust index */
402         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
403                 if (s > 0 && index > 0)
404                         index--;
405         } else if (s < 0)
406                 index++;
407
408  out:
409         *indexp = index;
410
411         return s == 0;
412 }
413
414 static inline struct nilfs_btree_node *
415 nilfs_btree_get_root(const struct nilfs_btree *btree)
416 {
417         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
418 }
419
420 static inline struct nilfs_btree_node *
421 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
422 {
423         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
424 }
425
426 static inline struct nilfs_btree_node *
427 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
428 {
429         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
430 }
431
432 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
433 {
434         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
435 }
436
437 static inline struct nilfs_btree_node *
438 nilfs_btree_get_node(const struct nilfs_btree *btree,
439                      const struct nilfs_btree_path *path,
440                      int level)
441 {
442         return (level == nilfs_btree_height(btree) - 1) ?
443                 nilfs_btree_get_root(btree) :
444                 nilfs_btree_get_nonroot_node(path, level);
445 }
446
447 static inline int
448 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
449 {
450         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
451                 dump_stack();
452                 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
453                        nilfs_btree_node_get_level(node), level);
454                 return 1;
455         }
456         return 0;
457 }
458
459 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
460                                  struct nilfs_btree_path *path,
461                                  __u64 key, __u64 *ptrp, int minlevel)
462 {
463         struct nilfs_btree_node *node;
464         __u64 ptr;
465         int level, index, found, ret;
466
467         node = nilfs_btree_get_root(btree);
468         level = nilfs_btree_node_get_level(node);
469         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
470                 return -ENOENT;
471
472         found = nilfs_btree_node_lookup(node, key, &index);
473         ptr = nilfs_btree_node_get_ptr(btree, node, index);
474         path[level].bp_bh = NULL;
475         path[level].bp_index = index;
476
477         for (level--; level >= minlevel; level--) {
478                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
479                 if (ret < 0)
480                         return ret;
481                 node = nilfs_btree_get_nonroot_node(path, level);
482                 if (nilfs_btree_bad_node(node, level))
483                         return -EINVAL;
484                 if (!found)
485                         found = nilfs_btree_node_lookup(node, key, &index);
486                 else
487                         index = 0;
488                 if (index < nilfs_btree_node_nchildren_max(node, btree))
489                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
490                 else {
491                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
492                         /* insert */
493                         ptr = NILFS_BMAP_INVALID_PTR;
494                 }
495                 path[level].bp_index = index;
496         }
497         if (!found)
498                 return -ENOENT;
499
500         if (ptrp != NULL)
501                 *ptrp = ptr;
502
503         return 0;
504 }
505
506 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
507                                       struct nilfs_btree_path *path,
508                                       __u64 *keyp, __u64 *ptrp)
509 {
510         struct nilfs_btree_node *node;
511         __u64 ptr;
512         int index, level, ret;
513
514         node = nilfs_btree_get_root(btree);
515         index = nilfs_btree_node_get_nchildren(node) - 1;
516         if (index < 0)
517                 return -ENOENT;
518         level = nilfs_btree_node_get_level(node);
519         ptr = nilfs_btree_node_get_ptr(btree, node, index);
520         path[level].bp_bh = NULL;
521         path[level].bp_index = index;
522
523         for (level--; level > 0; level--) {
524                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
525                 if (ret < 0)
526                         return ret;
527                 node = nilfs_btree_get_nonroot_node(path, level);
528                 if (nilfs_btree_bad_node(node, level))
529                         return -EINVAL;
530                 index = nilfs_btree_node_get_nchildren(node) - 1;
531                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
532                 path[level].bp_index = index;
533         }
534
535         if (keyp != NULL)
536                 *keyp = nilfs_btree_node_get_key(node, index);
537         if (ptrp != NULL)
538                 *ptrp = ptr;
539
540         return 0;
541 }
542
543 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
544                               __u64 key, int level, __u64 *ptrp)
545 {
546         struct nilfs_btree *btree;
547         struct nilfs_btree_path *path;
548         __u64 ptr;
549         int ret;
550
551         btree = (struct nilfs_btree *)bmap;
552         path = nilfs_btree_alloc_path();
553         if (path == NULL)
554                 return -ENOMEM;
555         nilfs_btree_init_path(path);
556
557         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
558
559         if (ptrp != NULL)
560                 *ptrp = ptr;
561
562         nilfs_btree_release_path(path);
563         nilfs_btree_free_path(path);
564
565         return ret;
566 }
567
568 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
569                                      __u64 key, __u64 *ptrp, unsigned maxblocks)
570 {
571         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
572         struct nilfs_btree_path *path;
573         struct nilfs_btree_node *node;
574         struct inode *dat = NULL;
575         __u64 ptr, ptr2;
576         sector_t blocknr;
577         int level = NILFS_BTREE_LEVEL_NODE_MIN;
578         int ret, cnt, index, maxlevel;
579
580         path = nilfs_btree_alloc_path();
581         if (path == NULL)
582                 return -ENOMEM;
583         nilfs_btree_init_path(path);
584         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
585         if (ret < 0)
586                 goto out;
587
588         if (NILFS_BMAP_USE_VBN(bmap)) {
589                 dat = nilfs_bmap_get_dat(bmap);
590                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
591                 if (ret < 0)
592                         goto out;
593                 ptr = blocknr;
594         }
595         cnt = 1;
596         if (cnt == maxblocks)
597                 goto end;
598
599         maxlevel = nilfs_btree_height(btree) - 1;
600         node = nilfs_btree_get_node(btree, path, level);
601         index = path[level].bp_index + 1;
602         for (;;) {
603                 while (index < nilfs_btree_node_get_nchildren(node)) {
604                         if (nilfs_btree_node_get_key(node, index) !=
605                             key + cnt)
606                                 goto end;
607                         ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
608                         if (dat) {
609                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
610                                 if (ret < 0)
611                                         goto out;
612                                 ptr2 = blocknr;
613                         }
614                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
615                                 goto end;
616                         index++;
617                         continue;
618                 }
619                 if (level == maxlevel)
620                         break;
621
622                 /* look-up right sibling node */
623                 node = nilfs_btree_get_node(btree, path, level + 1);
624                 index = path[level + 1].bp_index + 1;
625                 if (index >= nilfs_btree_node_get_nchildren(node) ||
626                     nilfs_btree_node_get_key(node, index) != key + cnt)
627                         break;
628                 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
629                 path[level + 1].bp_index = index;
630
631                 brelse(path[level].bp_bh);
632                 path[level].bp_bh = NULL;
633                 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
634                 if (ret < 0)
635                         goto out;
636                 node = nilfs_btree_get_nonroot_node(path, level);
637                 index = 0;
638                 path[level].bp_index = index;
639         }
640  end:
641         *ptrp = ptr;
642         ret = cnt;
643  out:
644         nilfs_btree_release_path(path);
645         nilfs_btree_free_path(path);
646         return ret;
647 }
648
649 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
650                                     struct nilfs_btree_path *path,
651                                     int level, __u64 key)
652 {
653         if (level < nilfs_btree_height(btree) - 1) {
654                 do {
655                         lock_buffer(path[level].bp_bh);
656                         nilfs_btree_node_set_key(
657                                 nilfs_btree_get_nonroot_node(path, level),
658                                 path[level].bp_index, key);
659                         if (!buffer_dirty(path[level].bp_bh))
660                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
661                         unlock_buffer(path[level].bp_bh);
662                 } while ((path[level].bp_index == 0) &&
663                          (++level < nilfs_btree_height(btree) - 1));
664         }
665
666         /* root */
667         if (level == nilfs_btree_height(btree) - 1) {
668                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
669                                          path[level].bp_index, key);
670         }
671 }
672
673 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
674                                   struct nilfs_btree_path *path,
675                                   int level, __u64 *keyp, __u64 *ptrp)
676 {
677         struct nilfs_btree_node *node;
678
679         if (level < nilfs_btree_height(btree) - 1) {
680                 lock_buffer(path[level].bp_bh);
681                 node = nilfs_btree_get_nonroot_node(path, level);
682                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
683                                         path[level].bp_index);
684                 if (!buffer_dirty(path[level].bp_bh))
685                         nilfs_btnode_mark_dirty(path[level].bp_bh);
686                 unlock_buffer(path[level].bp_bh);
687
688                 if (path[level].bp_index == 0)
689                         nilfs_btree_promote_key(btree, path, level + 1,
690                                                 nilfs_btree_node_get_key(node,
691                                                                          0));
692         } else {
693                 node = nilfs_btree_get_root(btree);
694                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
695                                         path[level].bp_index);
696         }
697 }
698
699 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
700                                    struct nilfs_btree_path *path,
701                                    int level, __u64 *keyp, __u64 *ptrp)
702 {
703         struct nilfs_btree_node *node, *left;
704         int nchildren, lnchildren, n, move;
705
706         lock_buffer(path[level].bp_bh);
707         lock_buffer(path[level].bp_sib_bh);
708
709         node = nilfs_btree_get_nonroot_node(path, level);
710         left = nilfs_btree_get_sib_node(path, level);
711         nchildren = nilfs_btree_node_get_nchildren(node);
712         lnchildren = nilfs_btree_node_get_nchildren(left);
713         move = 0;
714
715         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
716         if (n > path[level].bp_index) {
717                 /* move insert point */
718                 n--;
719                 move = 1;
720         }
721
722         nilfs_btree_node_move_left(btree, left, node, n);
723
724         if (!buffer_dirty(path[level].bp_bh))
725                 nilfs_btnode_mark_dirty(path[level].bp_bh);
726         if (!buffer_dirty(path[level].bp_sib_bh))
727                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
728
729         unlock_buffer(path[level].bp_bh);
730         unlock_buffer(path[level].bp_sib_bh);
731
732         nilfs_btree_promote_key(btree, path, level + 1,
733                                 nilfs_btree_node_get_key(node, 0));
734
735         if (move) {
736                 brelse(path[level].bp_bh);
737                 path[level].bp_bh = path[level].bp_sib_bh;
738                 path[level].bp_sib_bh = NULL;
739                 path[level].bp_index += lnchildren;
740                 path[level + 1].bp_index--;
741         } else {
742                 brelse(path[level].bp_sib_bh);
743                 path[level].bp_sib_bh = NULL;
744                 path[level].bp_index -= n;
745         }
746
747         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
748 }
749
750 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
751                                     struct nilfs_btree_path *path,
752                                     int level, __u64 *keyp, __u64 *ptrp)
753 {
754         struct nilfs_btree_node *node, *right;
755         int nchildren, rnchildren, n, move;
756
757         lock_buffer(path[level].bp_bh);
758         lock_buffer(path[level].bp_sib_bh);
759
760         node = nilfs_btree_get_nonroot_node(path, level);
761         right = nilfs_btree_get_sib_node(path, level);
762         nchildren = nilfs_btree_node_get_nchildren(node);
763         rnchildren = nilfs_btree_node_get_nchildren(right);
764         move = 0;
765
766         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
767         if (n > nchildren - path[level].bp_index) {
768                 /* move insert point */
769                 n--;
770                 move = 1;
771         }
772
773         nilfs_btree_node_move_right(btree, node, right, n);
774
775         if (!buffer_dirty(path[level].bp_bh))
776                 nilfs_btnode_mark_dirty(path[level].bp_bh);
777         if (!buffer_dirty(path[level].bp_sib_bh))
778                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
779
780         unlock_buffer(path[level].bp_bh);
781         unlock_buffer(path[level].bp_sib_bh);
782
783         path[level + 1].bp_index++;
784         nilfs_btree_promote_key(btree, path, level + 1,
785                                 nilfs_btree_node_get_key(right, 0));
786         path[level + 1].bp_index--;
787
788         if (move) {
789                 brelse(path[level].bp_bh);
790                 path[level].bp_bh = path[level].bp_sib_bh;
791                 path[level].bp_sib_bh = NULL;
792                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
793                 path[level + 1].bp_index++;
794         } else {
795                 brelse(path[level].bp_sib_bh);
796                 path[level].bp_sib_bh = NULL;
797         }
798
799         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
800 }
801
802 static void nilfs_btree_split(struct nilfs_btree *btree,
803                               struct nilfs_btree_path *path,
804                               int level, __u64 *keyp, __u64 *ptrp)
805 {
806         struct nilfs_btree_node *node, *right;
807         __u64 newkey;
808         __u64 newptr;
809         int nchildren, n, move;
810
811         lock_buffer(path[level].bp_bh);
812         lock_buffer(path[level].bp_sib_bh);
813
814         node = nilfs_btree_get_nonroot_node(path, level);
815         right = nilfs_btree_get_sib_node(path, level);
816         nchildren = nilfs_btree_node_get_nchildren(node);
817         move = 0;
818
819         n = (nchildren + 1) / 2;
820         if (n > nchildren - path[level].bp_index) {
821                 n--;
822                 move = 1;
823         }
824
825         nilfs_btree_node_move_right(btree, node, right, n);
826
827         if (!buffer_dirty(path[level].bp_bh))
828                 nilfs_btnode_mark_dirty(path[level].bp_bh);
829         if (!buffer_dirty(path[level].bp_sib_bh))
830                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
831
832         unlock_buffer(path[level].bp_bh);
833         unlock_buffer(path[level].bp_sib_bh);
834
835         newkey = nilfs_btree_node_get_key(right, 0);
836         newptr = path[level].bp_newreq.bpr_ptr;
837
838         if (move) {
839                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
840                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
841                                         path[level].bp_index);
842
843                 *keyp = nilfs_btree_node_get_key(right, 0);
844                 *ptrp = path[level].bp_newreq.bpr_ptr;
845
846                 brelse(path[level].bp_bh);
847                 path[level].bp_bh = path[level].bp_sib_bh;
848                 path[level].bp_sib_bh = NULL;
849         } else {
850                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
851
852                 *keyp = nilfs_btree_node_get_key(right, 0);
853                 *ptrp = path[level].bp_newreq.bpr_ptr;
854
855                 brelse(path[level].bp_sib_bh);
856                 path[level].bp_sib_bh = NULL;
857         }
858
859         path[level + 1].bp_index++;
860 }
861
862 static void nilfs_btree_grow(struct nilfs_btree *btree,
863                              struct nilfs_btree_path *path,
864                              int level, __u64 *keyp, __u64 *ptrp)
865 {
866         struct nilfs_btree_node *root, *child;
867         int n;
868
869         lock_buffer(path[level].bp_sib_bh);
870
871         root = nilfs_btree_get_root(btree);
872         child = nilfs_btree_get_sib_node(path, level);
873
874         n = nilfs_btree_node_get_nchildren(root);
875
876         nilfs_btree_node_move_right(btree, root, child, n);
877         nilfs_btree_node_set_level(root, level + 1);
878
879         if (!buffer_dirty(path[level].bp_sib_bh))
880                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
881
882         unlock_buffer(path[level].bp_sib_bh);
883
884         path[level].bp_bh = path[level].bp_sib_bh;
885         path[level].bp_sib_bh = NULL;
886
887         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
888
889         *keyp = nilfs_btree_node_get_key(child, 0);
890         *ptrp = path[level].bp_newreq.bpr_ptr;
891 }
892
893 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
894                                    const struct nilfs_btree_path *path)
895 {
896         struct nilfs_btree_node *node;
897         int level;
898
899         if (path == NULL)
900                 return NILFS_BMAP_INVALID_PTR;
901
902         /* left sibling */
903         level = NILFS_BTREE_LEVEL_NODE_MIN;
904         if (path[level].bp_index > 0) {
905                 node = nilfs_btree_get_node(btree, path, level);
906                 return nilfs_btree_node_get_ptr(btree, node,
907                                                 path[level].bp_index - 1);
908         }
909
910         /* parent */
911         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
912         if (level <= nilfs_btree_height(btree) - 1) {
913                 node = nilfs_btree_get_node(btree, path, level);
914                 return nilfs_btree_node_get_ptr(btree, node,
915                                                 path[level].bp_index);
916         }
917
918         return NILFS_BMAP_INVALID_PTR;
919 }
920
921 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
922                                        const struct nilfs_btree_path *path,
923                                        __u64 key)
924 {
925         __u64 ptr;
926
927         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
928         if (ptr != NILFS_BMAP_INVALID_PTR)
929                 /* sequential access */
930                 return ptr;
931         else {
932                 ptr = nilfs_btree_find_near(btree, path);
933                 if (ptr != NILFS_BMAP_INVALID_PTR)
934                         /* near */
935                         return ptr;
936         }
937         /* block group */
938         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
939 }
940
941 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
942                                      __u64 ptr)
943 {
944         btree->bt_bmap.b_last_allocated_key = key;
945         btree->bt_bmap.b_last_allocated_ptr = ptr;
946 }
947
948 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
949                                       struct nilfs_btree_path *path,
950                                       int *levelp, __u64 key, __u64 ptr,
951                                       struct nilfs_bmap_stats *stats)
952 {
953         struct buffer_head *bh;
954         struct nilfs_btree_node *node, *parent, *sib;
955         __u64 sibptr;
956         int pindex, level, ret;
957         struct inode *dat = NULL;
958
959         stats->bs_nblocks = 0;
960         level = NILFS_BTREE_LEVEL_DATA;
961
962         /* allocate a new ptr for data block */
963         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
964                 path[level].bp_newreq.bpr_ptr =
965                         nilfs_btree_find_target_v(btree, path, key);
966                 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
967         }
968
969         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
970                                            &path[level].bp_newreq, dat);
971         if (ret < 0)
972                 goto err_out_data;
973
974         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
975              level < nilfs_btree_height(btree) - 1;
976              level++) {
977                 node = nilfs_btree_get_nonroot_node(path, level);
978                 if (nilfs_btree_node_get_nchildren(node) <
979                     nilfs_btree_node_nchildren_max(node, btree)) {
980                         path[level].bp_op = nilfs_btree_do_insert;
981                         stats->bs_nblocks++;
982                         goto out;
983                 }
984
985                 parent = nilfs_btree_get_node(btree, path, level + 1);
986                 pindex = path[level + 1].bp_index;
987
988                 /* left sibling */
989                 if (pindex > 0) {
990                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
991                                                           pindex - 1);
992                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
993                         if (ret < 0)
994                                 goto err_out_child_node;
995                         sib = (struct nilfs_btree_node *)bh->b_data;
996                         if (nilfs_btree_node_get_nchildren(sib) <
997                             nilfs_btree_node_nchildren_max(sib, btree)) {
998                                 path[level].bp_sib_bh = bh;
999                                 path[level].bp_op = nilfs_btree_carry_left;
1000                                 stats->bs_nblocks++;
1001                                 goto out;
1002                         } else
1003                                 brelse(bh);
1004                 }
1005
1006                 /* right sibling */
1007                 if (pindex <
1008                     nilfs_btree_node_get_nchildren(parent) - 1) {
1009                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1010                                                           pindex + 1);
1011                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1012                         if (ret < 0)
1013                                 goto err_out_child_node;
1014                         sib = (struct nilfs_btree_node *)bh->b_data;
1015                         if (nilfs_btree_node_get_nchildren(sib) <
1016                             nilfs_btree_node_nchildren_max(sib, btree)) {
1017                                 path[level].bp_sib_bh = bh;
1018                                 path[level].bp_op = nilfs_btree_carry_right;
1019                                 stats->bs_nblocks++;
1020                                 goto out;
1021                         } else
1022                                 brelse(bh);
1023                 }
1024
1025                 /* split */
1026                 path[level].bp_newreq.bpr_ptr =
1027                         path[level - 1].bp_newreq.bpr_ptr + 1;
1028                 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1029                                                    &path[level].bp_newreq, dat);
1030                 if (ret < 0)
1031                         goto err_out_child_node;
1032                 ret = nilfs_btree_get_new_block(btree,
1033                                                 path[level].bp_newreq.bpr_ptr,
1034                                                 &bh);
1035                 if (ret < 0)
1036                         goto err_out_curr_node;
1037
1038                 stats->bs_nblocks++;
1039
1040                 lock_buffer(bh);
1041                 nilfs_btree_node_init(btree,
1042                                       (struct nilfs_btree_node *)bh->b_data,
1043                                       0, level, 0, NULL, NULL);
1044                 unlock_buffer(bh);
1045                 path[level].bp_sib_bh = bh;
1046                 path[level].bp_op = nilfs_btree_split;
1047         }
1048
1049         /* root */
1050         node = nilfs_btree_get_root(btree);
1051         if (nilfs_btree_node_get_nchildren(node) <
1052             nilfs_btree_node_nchildren_max(node, btree)) {
1053                 path[level].bp_op = nilfs_btree_do_insert;
1054                 stats->bs_nblocks++;
1055                 goto out;
1056         }
1057
1058         /* grow */
1059         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1060         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1061                                            &path[level].bp_newreq, dat);
1062         if (ret < 0)
1063                 goto err_out_child_node;
1064         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1065                                         &bh);
1066         if (ret < 0)
1067                 goto err_out_curr_node;
1068
1069         lock_buffer(bh);
1070         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1071                               0, level, 0, NULL, NULL);
1072         unlock_buffer(bh);
1073         path[level].bp_sib_bh = bh;
1074         path[level].bp_op = nilfs_btree_grow;
1075
1076         level++;
1077         path[level].bp_op = nilfs_btree_do_insert;
1078
1079         /* a newly-created node block and a data block are added */
1080         stats->bs_nblocks += 2;
1081
1082         /* success */
1083  out:
1084         *levelp = level;
1085         return ret;
1086
1087         /* error */
1088  err_out_curr_node:
1089         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1090                                    dat);
1091  err_out_child_node:
1092         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1093                 nilfs_btnode_delete(path[level].bp_sib_bh);
1094                 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1095                                            &path[level].bp_newreq, dat);
1096
1097         }
1098
1099         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1100                                    dat);
1101  err_out_data:
1102         *levelp = level;
1103         stats->bs_nblocks = 0;
1104         return ret;
1105 }
1106
1107 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1108                                       struct nilfs_btree_path *path,
1109                                       int maxlevel, __u64 key, __u64 ptr)
1110 {
1111         struct inode *dat = NULL;
1112         int level;
1113
1114         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1115         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1116         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
1117                 nilfs_btree_set_target_v(btree, key, ptr);
1118                 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1119         }
1120
1121         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1122                 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1123                                             &path[level - 1].bp_newreq, dat);
1124                 path[level].bp_op(btree, path, level, &key, &ptr);
1125         }
1126
1127         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1128                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1129 }
1130
1131 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1132 {
1133         struct nilfs_btree *btree;
1134         struct nilfs_btree_path *path;
1135         struct nilfs_bmap_stats stats;
1136         int level, ret;
1137
1138         btree = (struct nilfs_btree *)bmap;
1139         path = nilfs_btree_alloc_path();
1140         if (path == NULL)
1141                 return -ENOMEM;
1142         nilfs_btree_init_path(path);
1143
1144         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1145                                     NILFS_BTREE_LEVEL_NODE_MIN);
1146         if (ret != -ENOENT) {
1147                 if (ret == 0)
1148                         ret = -EEXIST;
1149                 goto out;
1150         }
1151
1152         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1153         if (ret < 0)
1154                 goto out;
1155         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1156         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1157
1158  out:
1159         nilfs_btree_release_path(path);
1160         nilfs_btree_free_path(path);
1161         return ret;
1162 }
1163
1164 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1165                                   struct nilfs_btree_path *path,
1166                                   int level, __u64 *keyp, __u64 *ptrp)
1167 {
1168         struct nilfs_btree_node *node;
1169
1170         if (level < nilfs_btree_height(btree) - 1) {
1171                 lock_buffer(path[level].bp_bh);
1172                 node = nilfs_btree_get_nonroot_node(path, level);
1173                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1174                                         path[level].bp_index);
1175                 if (!buffer_dirty(path[level].bp_bh))
1176                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1177                 unlock_buffer(path[level].bp_bh);
1178                 if (path[level].bp_index == 0)
1179                         nilfs_btree_promote_key(btree, path, level + 1,
1180                                 nilfs_btree_node_get_key(node, 0));
1181         } else {
1182                 node = nilfs_btree_get_root(btree);
1183                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1184                                         path[level].bp_index);
1185         }
1186 }
1187
1188 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1189                                     struct nilfs_btree_path *path,
1190                                     int level, __u64 *keyp, __u64 *ptrp)
1191 {
1192         struct nilfs_btree_node *node, *left;
1193         int nchildren, lnchildren, n;
1194
1195         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1196
1197         lock_buffer(path[level].bp_bh);
1198         lock_buffer(path[level].bp_sib_bh);
1199
1200         node = nilfs_btree_get_nonroot_node(path, level);
1201         left = nilfs_btree_get_sib_node(path, level);
1202         nchildren = nilfs_btree_node_get_nchildren(node);
1203         lnchildren = nilfs_btree_node_get_nchildren(left);
1204
1205         n = (nchildren + lnchildren) / 2 - nchildren;
1206
1207         nilfs_btree_node_move_right(btree, left, node, n);
1208
1209         if (!buffer_dirty(path[level].bp_bh))
1210                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1211         if (!buffer_dirty(path[level].bp_sib_bh))
1212                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1213
1214         unlock_buffer(path[level].bp_bh);
1215         unlock_buffer(path[level].bp_sib_bh);
1216
1217         nilfs_btree_promote_key(btree, path, level + 1,
1218                                 nilfs_btree_node_get_key(node, 0));
1219
1220         brelse(path[level].bp_sib_bh);
1221         path[level].bp_sib_bh = NULL;
1222         path[level].bp_index += n;
1223 }
1224
1225 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1226                                      struct nilfs_btree_path *path,
1227                                      int level, __u64 *keyp, __u64 *ptrp)
1228 {
1229         struct nilfs_btree_node *node, *right;
1230         int nchildren, rnchildren, n;
1231
1232         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1233
1234         lock_buffer(path[level].bp_bh);
1235         lock_buffer(path[level].bp_sib_bh);
1236
1237         node = nilfs_btree_get_nonroot_node(path, level);
1238         right = nilfs_btree_get_sib_node(path, level);
1239         nchildren = nilfs_btree_node_get_nchildren(node);
1240         rnchildren = nilfs_btree_node_get_nchildren(right);
1241
1242         n = (nchildren + rnchildren) / 2 - nchildren;
1243
1244         nilfs_btree_node_move_left(btree, node, right, n);
1245
1246         if (!buffer_dirty(path[level].bp_bh))
1247                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1248         if (!buffer_dirty(path[level].bp_sib_bh))
1249                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1250
1251         unlock_buffer(path[level].bp_bh);
1252         unlock_buffer(path[level].bp_sib_bh);
1253
1254         path[level + 1].bp_index++;
1255         nilfs_btree_promote_key(btree, path, level + 1,
1256                                 nilfs_btree_node_get_key(right, 0));
1257         path[level + 1].bp_index--;
1258
1259         brelse(path[level].bp_sib_bh);
1260         path[level].bp_sib_bh = NULL;
1261 }
1262
1263 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1264                                     struct nilfs_btree_path *path,
1265                                     int level, __u64 *keyp, __u64 *ptrp)
1266 {
1267         struct nilfs_btree_node *node, *left;
1268         int n;
1269
1270         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1271
1272         lock_buffer(path[level].bp_bh);
1273         lock_buffer(path[level].bp_sib_bh);
1274
1275         node = nilfs_btree_get_nonroot_node(path, level);
1276         left = nilfs_btree_get_sib_node(path, level);
1277
1278         n = nilfs_btree_node_get_nchildren(node);
1279
1280         nilfs_btree_node_move_left(btree, left, node, n);
1281
1282         if (!buffer_dirty(path[level].bp_sib_bh))
1283                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1284
1285         unlock_buffer(path[level].bp_bh);
1286         unlock_buffer(path[level].bp_sib_bh);
1287
1288         nilfs_btnode_delete(path[level].bp_bh);
1289         path[level].bp_bh = path[level].bp_sib_bh;
1290         path[level].bp_sib_bh = NULL;
1291         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1292 }
1293
1294 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1295                                      struct nilfs_btree_path *path,
1296                                      int level, __u64 *keyp, __u64 *ptrp)
1297 {
1298         struct nilfs_btree_node *node, *right;
1299         int n;
1300
1301         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1302
1303         lock_buffer(path[level].bp_bh);
1304         lock_buffer(path[level].bp_sib_bh);
1305
1306         node = nilfs_btree_get_nonroot_node(path, level);
1307         right = nilfs_btree_get_sib_node(path, level);
1308
1309         n = nilfs_btree_node_get_nchildren(right);
1310
1311         nilfs_btree_node_move_left(btree, node, right, n);
1312
1313         if (!buffer_dirty(path[level].bp_bh))
1314                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1315
1316         unlock_buffer(path[level].bp_bh);
1317         unlock_buffer(path[level].bp_sib_bh);
1318
1319         nilfs_btnode_delete(path[level].bp_sib_bh);
1320         path[level].bp_sib_bh = NULL;
1321         path[level + 1].bp_index++;
1322 }
1323
1324 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1325                                struct nilfs_btree_path *path,
1326                                int level, __u64 *keyp, __u64 *ptrp)
1327 {
1328         struct nilfs_btree_node *root, *child;
1329         int n;
1330
1331         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1332
1333         lock_buffer(path[level].bp_bh);
1334         root = nilfs_btree_get_root(btree);
1335         child = nilfs_btree_get_nonroot_node(path, level);
1336
1337         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1338         nilfs_btree_node_set_level(root, level);
1339         n = nilfs_btree_node_get_nchildren(child);
1340         nilfs_btree_node_move_left(btree, root, child, n);
1341         unlock_buffer(path[level].bp_bh);
1342
1343         nilfs_btnode_delete(path[level].bp_bh);
1344         path[level].bp_bh = NULL;
1345 }
1346
1347
1348 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1349                                       struct nilfs_btree_path *path,
1350                                       int *levelp,
1351                                       struct nilfs_bmap_stats *stats,
1352                                       struct inode *dat)
1353 {
1354         struct buffer_head *bh;
1355         struct nilfs_btree_node *node, *parent, *sib;
1356         __u64 sibptr;
1357         int pindex, level, ret;
1358
1359         ret = 0;
1360         stats->bs_nblocks = 0;
1361         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1362              level < nilfs_btree_height(btree) - 1;
1363              level++) {
1364                 node = nilfs_btree_get_nonroot_node(path, level);
1365                 path[level].bp_oldreq.bpr_ptr =
1366                         nilfs_btree_node_get_ptr(btree, node,
1367                                                  path[level].bp_index);
1368                 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1369                                                  &path[level].bp_oldreq, dat);
1370                 if (ret < 0)
1371                         goto err_out_child_node;
1372
1373                 if (nilfs_btree_node_get_nchildren(node) >
1374                     nilfs_btree_node_nchildren_min(node, btree)) {
1375                         path[level].bp_op = nilfs_btree_do_delete;
1376                         stats->bs_nblocks++;
1377                         goto out;
1378                 }
1379
1380                 parent = nilfs_btree_get_node(btree, path, level + 1);
1381                 pindex = path[level + 1].bp_index;
1382
1383                 if (pindex > 0) {
1384                         /* left sibling */
1385                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1386                                                           pindex - 1);
1387                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1388                         if (ret < 0)
1389                                 goto err_out_curr_node;
1390                         sib = (struct nilfs_btree_node *)bh->b_data;
1391                         if (nilfs_btree_node_get_nchildren(sib) >
1392                             nilfs_btree_node_nchildren_min(sib, btree)) {
1393                                 path[level].bp_sib_bh = bh;
1394                                 path[level].bp_op = nilfs_btree_borrow_left;
1395                                 stats->bs_nblocks++;
1396                                 goto out;
1397                         } else {
1398                                 path[level].bp_sib_bh = bh;
1399                                 path[level].bp_op = nilfs_btree_concat_left;
1400                                 stats->bs_nblocks++;
1401                                 /* continue; */
1402                         }
1403                 } else if (pindex <
1404                            nilfs_btree_node_get_nchildren(parent) - 1) {
1405                         /* right sibling */
1406                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1407                                                           pindex + 1);
1408                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1409                         if (ret < 0)
1410                                 goto err_out_curr_node;
1411                         sib = (struct nilfs_btree_node *)bh->b_data;
1412                         if (nilfs_btree_node_get_nchildren(sib) >
1413                             nilfs_btree_node_nchildren_min(sib, btree)) {
1414                                 path[level].bp_sib_bh = bh;
1415                                 path[level].bp_op = nilfs_btree_borrow_right;
1416                                 stats->bs_nblocks++;
1417                                 goto out;
1418                         } else {
1419                                 path[level].bp_sib_bh = bh;
1420                                 path[level].bp_op = nilfs_btree_concat_right;
1421                                 stats->bs_nblocks++;
1422                                 /* continue; */
1423                         }
1424                 } else {
1425                         /* no siblings */
1426                         /* the only child of the root node */
1427                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1428                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1429                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1430                                 path[level].bp_op = nilfs_btree_shrink;
1431                                 stats->bs_nblocks += 2;
1432                         } else {
1433                                 path[level].bp_op = nilfs_btree_do_delete;
1434                                 stats->bs_nblocks++;
1435                         }
1436
1437                         goto out;
1438
1439                 }
1440         }
1441
1442         node = nilfs_btree_get_root(btree);
1443         path[level].bp_oldreq.bpr_ptr =
1444                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1445
1446         ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1447                                          &path[level].bp_oldreq, dat);
1448         if (ret < 0)
1449                 goto err_out_child_node;
1450
1451         /* child of the root node is deleted */
1452         path[level].bp_op = nilfs_btree_do_delete;
1453         stats->bs_nblocks++;
1454
1455         /* success */
1456  out:
1457         *levelp = level;
1458         return ret;
1459
1460         /* error */
1461  err_out_curr_node:
1462         nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
1463  err_out_child_node:
1464         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1465                 brelse(path[level].bp_sib_bh);
1466                 nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1467                                          &path[level].bp_oldreq, dat);
1468         }
1469         *levelp = level;
1470         stats->bs_nblocks = 0;
1471         return ret;
1472 }
1473
1474 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1475                                       struct nilfs_btree_path *path,
1476                                       int maxlevel, struct inode *dat)
1477 {
1478         int level;
1479
1480         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1481                 nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1482                                           &path[level].bp_oldreq, dat);
1483                 path[level].bp_op(btree, path, level, NULL, NULL);
1484         }
1485
1486         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1487                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1488 }
1489
1490 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1491
1492 {
1493         struct nilfs_btree *btree;
1494         struct nilfs_btree_path *path;
1495         struct nilfs_bmap_stats stats;
1496         struct inode *dat;
1497         int level, ret;
1498
1499         btree = (struct nilfs_btree *)bmap;
1500         path = nilfs_btree_alloc_path();
1501         if (path == NULL)
1502                 return -ENOMEM;
1503         nilfs_btree_init_path(path);
1504         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1505                                     NILFS_BTREE_LEVEL_NODE_MIN);
1506         if (ret < 0)
1507                 goto out;
1508
1509
1510         dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
1511                 nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1512
1513         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1514         if (ret < 0)
1515                 goto out;
1516         nilfs_btree_commit_delete(btree, path, level, dat);
1517         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1518
1519 out:
1520         nilfs_btree_release_path(path);
1521         nilfs_btree_free_path(path);
1522         return ret;
1523 }
1524
1525 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1526 {
1527         struct nilfs_btree *btree;
1528         struct nilfs_btree_path *path;
1529         int ret;
1530
1531         btree = (struct nilfs_btree *)bmap;
1532         path = nilfs_btree_alloc_path();
1533         if (path == NULL)
1534                 return -ENOMEM;
1535         nilfs_btree_init_path(path);
1536
1537         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1538
1539         nilfs_btree_release_path(path);
1540         nilfs_btree_free_path(path);
1541
1542         return ret;
1543 }
1544
1545 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1546 {
1547         struct buffer_head *bh;
1548         struct nilfs_btree *btree;
1549         struct nilfs_btree_node *root, *node;
1550         __u64 maxkey, nextmaxkey;
1551         __u64 ptr;
1552         int nchildren, ret;
1553
1554         btree = (struct nilfs_btree *)bmap;
1555         root = nilfs_btree_get_root(btree);
1556         switch (nilfs_btree_height(btree)) {
1557         case 2:
1558                 bh = NULL;
1559                 node = root;
1560                 break;
1561         case 3:
1562                 nchildren = nilfs_btree_node_get_nchildren(root);
1563                 if (nchildren > 1)
1564                         return 0;
1565                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1566                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1567                 if (ret < 0)
1568                         return ret;
1569                 node = (struct nilfs_btree_node *)bh->b_data;
1570                 break;
1571         default:
1572                 return 0;
1573         }
1574
1575         nchildren = nilfs_btree_node_get_nchildren(node);
1576         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1577         nextmaxkey = (nchildren > 1) ?
1578                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1579         if (bh != NULL)
1580                 brelse(bh);
1581
1582         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1583 }
1584
1585 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1586                                    __u64 *keys, __u64 *ptrs, int nitems)
1587 {
1588         struct buffer_head *bh;
1589         struct nilfs_btree *btree;
1590         struct nilfs_btree_node *node, *root;
1591         __le64 *dkeys;
1592         __le64 *dptrs;
1593         __u64 ptr;
1594         int nchildren, i, ret;
1595
1596         btree = (struct nilfs_btree *)bmap;
1597         root = nilfs_btree_get_root(btree);
1598         switch (nilfs_btree_height(btree)) {
1599         case 2:
1600                 bh = NULL;
1601                 node = root;
1602                 break;
1603         case 3:
1604                 nchildren = nilfs_btree_node_get_nchildren(root);
1605                 WARN_ON(nchildren > 1);
1606                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1607                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1608                 if (ret < 0)
1609                         return ret;
1610                 node = (struct nilfs_btree_node *)bh->b_data;
1611                 break;
1612         default:
1613                 node = NULL;
1614                 return -EINVAL;
1615         }
1616
1617         nchildren = nilfs_btree_node_get_nchildren(node);
1618         if (nchildren < nitems)
1619                 nitems = nchildren;
1620         dkeys = nilfs_btree_node_dkeys(node);
1621         dptrs = nilfs_btree_node_dptrs(node, btree);
1622         for (i = 0; i < nitems; i++) {
1623                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1624                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1625         }
1626
1627         if (bh != NULL)
1628                 brelse(bh);
1629
1630         return nitems;
1631 }
1632
1633 static int
1634 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1635                                        union nilfs_bmap_ptr_req *dreq,
1636                                        union nilfs_bmap_ptr_req *nreq,
1637                                        struct buffer_head **bhp,
1638                                        struct nilfs_bmap_stats *stats)
1639 {
1640         struct buffer_head *bh;
1641         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1642         struct inode *dat = NULL;
1643         int ret;
1644
1645         stats->bs_nblocks = 0;
1646
1647         /* for data */
1648         /* cannot find near ptr */
1649         if (NILFS_BMAP_USE_VBN(bmap)) {
1650                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1651                 dat = nilfs_bmap_get_dat(bmap);
1652         }
1653
1654         ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
1655         if (ret < 0)
1656                 return ret;
1657
1658         *bhp = NULL;
1659         stats->bs_nblocks++;
1660         if (nreq != NULL) {
1661                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1662                 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
1663                 if (ret < 0)
1664                         goto err_out_dreq;
1665
1666                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1667                 if (ret < 0)
1668                         goto err_out_nreq;
1669
1670                 *bhp = bh;
1671                 stats->bs_nblocks++;
1672         }
1673
1674         /* success */
1675         return 0;
1676
1677         /* error */
1678  err_out_nreq:
1679         nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
1680  err_out_dreq:
1681         nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
1682         stats->bs_nblocks = 0;
1683         return ret;
1684
1685 }
1686
1687 static void
1688 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1689                                       __u64 key, __u64 ptr,
1690                                       const __u64 *keys, const __u64 *ptrs,
1691                                       int n,
1692                                       union nilfs_bmap_ptr_req *dreq,
1693                                       union nilfs_bmap_ptr_req *nreq,
1694                                       struct buffer_head *bh)
1695 {
1696         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1697         struct nilfs_btree_node *node;
1698         struct inode *dat;
1699         __u64 tmpptr;
1700
1701         /* free resources */
1702         if (bmap->b_ops->bop_clear != NULL)
1703                 bmap->b_ops->bop_clear(bmap);
1704
1705         /* ptr must be a pointer to a buffer head. */
1706         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1707
1708         /* convert and insert */
1709         dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
1710         nilfs_btree_init(bmap);
1711         if (nreq != NULL) {
1712                 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1713                 nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1714
1715                 /* create child node at level 1 */
1716                 lock_buffer(bh);
1717                 node = (struct nilfs_btree_node *)bh->b_data;
1718                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1719                 nilfs_btree_node_insert(btree, node,
1720                                         key, dreq->bpr_ptr, n);
1721                 if (!buffer_dirty(bh))
1722                         nilfs_btnode_mark_dirty(bh);
1723                 if (!nilfs_bmap_dirty(bmap))
1724                         nilfs_bmap_set_dirty(bmap);
1725
1726                 unlock_buffer(bh);
1727                 brelse(bh);
1728
1729                 /* create root node at level 2 */
1730                 node = nilfs_btree_get_root(btree);
1731                 tmpptr = nreq->bpr_ptr;
1732                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1733                                       2, 1, &keys[0], &tmpptr);
1734         } else {
1735                 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1736
1737                 /* create root node at level 1 */
1738                 node = nilfs_btree_get_root(btree);
1739                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1740                                       1, n, keys, ptrs);
1741                 nilfs_btree_node_insert(btree, node,
1742                                         key, dreq->bpr_ptr, n);
1743                 if (!nilfs_bmap_dirty(bmap))
1744                         nilfs_bmap_set_dirty(bmap);
1745         }
1746
1747         if (NILFS_BMAP_USE_VBN(bmap))
1748                 nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1749 }
1750
1751 /**
1752  * nilfs_btree_convert_and_insert -
1753  * @bmap:
1754  * @key:
1755  * @ptr:
1756  * @keys:
1757  * @ptrs:
1758  * @n:
1759  */
1760 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1761                                    __u64 key, __u64 ptr,
1762                                    const __u64 *keys, const __u64 *ptrs, int n)
1763 {
1764         struct buffer_head *bh;
1765         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1766         struct nilfs_bmap_stats stats;
1767         int ret;
1768
1769         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1770                 di = &dreq;
1771                 ni = NULL;
1772         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1773                            1 << bmap->b_inode->i_blkbits)) {
1774                 di = &dreq;
1775                 ni = &nreq;
1776         } else {
1777                 di = NULL;
1778                 ni = NULL;
1779                 BUG();
1780         }
1781
1782         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1783                                                      &stats);
1784         if (ret < 0)
1785                 return ret;
1786         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1787                                               di, ni, bh);
1788         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1789         return 0;
1790 }
1791
1792 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1793                                    struct nilfs_btree_path *path,
1794                                    int level,
1795                                    struct buffer_head *bh)
1796 {
1797         while ((++level < nilfs_btree_height(btree) - 1) &&
1798                !buffer_dirty(path[level].bp_bh))
1799                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1800
1801         return 0;
1802 }
1803
1804 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1805                                         struct nilfs_btree_path *path,
1806                                         int level, struct inode *dat)
1807 {
1808         struct nilfs_btree_node *parent;
1809         int ret;
1810
1811         parent = nilfs_btree_get_node(btree, path, level + 1);
1812         path[level].bp_oldreq.bpr_ptr =
1813                 nilfs_btree_node_get_ptr(btree, parent,
1814                                          path[level + 1].bp_index);
1815         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1816         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1817                                        &path[level].bp_newreq.bpr_req);
1818         if (ret < 0)
1819                 return ret;
1820
1821         if (buffer_nilfs_node(path[level].bp_bh)) {
1822                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1823                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1824                 path[level].bp_ctxt.bh = path[level].bp_bh;
1825                 ret = nilfs_btnode_prepare_change_key(
1826                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1827                         &path[level].bp_ctxt);
1828                 if (ret < 0) {
1829                         nilfs_dat_abort_update(dat,
1830                                                &path[level].bp_oldreq.bpr_req,
1831                                                &path[level].bp_newreq.bpr_req);
1832                         return ret;
1833                 }
1834         }
1835
1836         return 0;
1837 }
1838
1839 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1840                                         struct nilfs_btree_path *path,
1841                                         int level, struct inode *dat)
1842 {
1843         struct nilfs_btree_node *parent;
1844
1845         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1846                                 &path[level].bp_newreq.bpr_req,
1847                                 btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
1848
1849         if (buffer_nilfs_node(path[level].bp_bh)) {
1850                 nilfs_btnode_commit_change_key(
1851                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1852                         &path[level].bp_ctxt);
1853                 path[level].bp_bh = path[level].bp_ctxt.bh;
1854         }
1855         set_buffer_nilfs_volatile(path[level].bp_bh);
1856
1857         parent = nilfs_btree_get_node(btree, path, level + 1);
1858         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1859                                  path[level].bp_newreq.bpr_ptr);
1860 }
1861
1862 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1863                                        struct nilfs_btree_path *path,
1864                                        int level, struct inode *dat)
1865 {
1866         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1867                                &path[level].bp_newreq.bpr_req);
1868         if (buffer_nilfs_node(path[level].bp_bh))
1869                 nilfs_btnode_abort_change_key(
1870                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1871                         &path[level].bp_ctxt);
1872 }
1873
1874 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1875                                            struct nilfs_btree_path *path,
1876                                            int minlevel, int *maxlevelp,
1877                                            struct inode *dat)
1878 {
1879         int level, ret;
1880
1881         level = minlevel;
1882         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1883                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1884                 if (ret < 0)
1885                         return ret;
1886         }
1887         while ((++level < nilfs_btree_height(btree) - 1) &&
1888                !buffer_dirty(path[level].bp_bh)) {
1889
1890                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1891                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1892                 if (ret < 0)
1893                         goto out;
1894         }
1895
1896         /* success */
1897         *maxlevelp = level - 1;
1898         return 0;
1899
1900         /* error */
1901  out:
1902         while (--level > minlevel)
1903                 nilfs_btree_abort_update_v(btree, path, level, dat);
1904         if (!buffer_nilfs_volatile(path[level].bp_bh))
1905                 nilfs_btree_abort_update_v(btree, path, level, dat);
1906         return ret;
1907 }
1908
1909 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1910                                            struct nilfs_btree_path *path,
1911                                            int minlevel, int maxlevel,
1912                                            struct buffer_head *bh,
1913                                            struct inode *dat)
1914 {
1915         int level;
1916
1917         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1918                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1919
1920         for (level = minlevel + 1; level <= maxlevel; level++)
1921                 nilfs_btree_commit_update_v(btree, path, level, dat);
1922 }
1923
1924 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1925                                    struct nilfs_btree_path *path,
1926                                    int level, struct buffer_head *bh)
1927 {
1928         int maxlevel, ret;
1929         struct nilfs_btree_node *parent;
1930         struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1931         __u64 ptr;
1932
1933         get_bh(bh);
1934         path[level].bp_bh = bh;
1935         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1936                                               dat);
1937         if (ret < 0)
1938                 goto out;
1939
1940         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1941                 parent = nilfs_btree_get_node(btree, path, level + 1);
1942                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1943                                                path[level + 1].bp_index);
1944                 ret = nilfs_dat_mark_dirty(dat, ptr);
1945                 if (ret < 0)
1946                         goto out;
1947         }
1948
1949         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1950
1951  out:
1952         brelse(path[level].bp_bh);
1953         path[level].bp_bh = NULL;
1954         return ret;
1955 }
1956
1957 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1958                                  struct buffer_head *bh)
1959 {
1960         struct nilfs_btree *btree;
1961         struct nilfs_btree_path *path;
1962         struct nilfs_btree_node *node;
1963         __u64 key;
1964         int level, ret;
1965
1966         WARN_ON(!buffer_dirty(bh));
1967
1968         btree = (struct nilfs_btree *)bmap;
1969         path = nilfs_btree_alloc_path();
1970         if (path == NULL)
1971                 return -ENOMEM;
1972         nilfs_btree_init_path(path);
1973
1974         if (buffer_nilfs_node(bh)) {
1975                 node = (struct nilfs_btree_node *)bh->b_data;
1976                 key = nilfs_btree_node_get_key(node, 0);
1977                 level = nilfs_btree_node_get_level(node);
1978         } else {
1979                 key = nilfs_bmap_data_get_key(bmap, bh);
1980                 level = NILFS_BTREE_LEVEL_DATA;
1981         }
1982
1983         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1984         if (ret < 0) {
1985                 if (unlikely(ret == -ENOENT))
1986                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1987                                __func__, (unsigned long long)key, level);
1988                 goto out;
1989         }
1990
1991         ret = NILFS_BMAP_USE_VBN(bmap) ?
1992                 nilfs_btree_propagate_v(btree, path, level, bh) :
1993                 nilfs_btree_propagate_p(btree, path, level, bh);
1994
1995  out:
1996         nilfs_btree_release_path(path);
1997         nilfs_btree_free_path(path);
1998
1999         return ret;
2000 }
2001
2002 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
2003                                     struct buffer_head *bh)
2004 {
2005         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
2006 }
2007
2008 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
2009                                          struct list_head *lists,
2010                                          struct buffer_head *bh)
2011 {
2012         struct list_head *head;
2013         struct buffer_head *cbh;
2014         struct nilfs_btree_node *node, *cnode;
2015         __u64 key, ckey;
2016         int level;
2017
2018         get_bh(bh);
2019         node = (struct nilfs_btree_node *)bh->b_data;
2020         key = nilfs_btree_node_get_key(node, 0);
2021         level = nilfs_btree_node_get_level(node);
2022         list_for_each(head, &lists[level]) {
2023                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2024                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2025                 ckey = nilfs_btree_node_get_key(cnode, 0);
2026                 if (key < ckey)
2027                         break;
2028         }
2029         list_add_tail(&bh->b_assoc_buffers, head);
2030 }
2031
2032 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
2033                                              struct list_head *listp)
2034 {
2035         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
2036         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
2037         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2038         struct pagevec pvec;
2039         struct buffer_head *bh, *head;
2040         pgoff_t index = 0;
2041         int level, i;
2042
2043         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2044              level < NILFS_BTREE_LEVEL_MAX;
2045              level++)
2046                 INIT_LIST_HEAD(&lists[level]);
2047
2048         pagevec_init(&pvec, 0);
2049
2050         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2051                                   PAGEVEC_SIZE)) {
2052                 for (i = 0; i < pagevec_count(&pvec); i++) {
2053                         bh = head = page_buffers(pvec.pages[i]);
2054                         do {
2055                                 if (buffer_dirty(bh))
2056                                         nilfs_btree_add_dirty_buffer(btree,
2057                                                                      lists, bh);
2058                         } while ((bh = bh->b_this_page) != head);
2059                 }
2060                 pagevec_release(&pvec);
2061                 cond_resched();
2062         }
2063
2064         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2065              level < NILFS_BTREE_LEVEL_MAX;
2066              level++)
2067                 list_splice(&lists[level], listp->prev);
2068 }
2069
2070 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2071                                 struct nilfs_btree_path *path,
2072                                 int level,
2073                                 struct buffer_head **bh,
2074                                 sector_t blocknr,
2075                                 union nilfs_binfo *binfo)
2076 {
2077         struct nilfs_btree_node *parent;
2078         __u64 key;
2079         __u64 ptr;
2080         int ret;
2081
2082         parent = nilfs_btree_get_node(btree, path, level + 1);
2083         ptr = nilfs_btree_node_get_ptr(btree, parent,
2084                                        path[level + 1].bp_index);
2085         if (buffer_nilfs_node(*bh)) {
2086                 path[level].bp_ctxt.oldkey = ptr;
2087                 path[level].bp_ctxt.newkey = blocknr;
2088                 path[level].bp_ctxt.bh = *bh;
2089                 ret = nilfs_btnode_prepare_change_key(
2090                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2091                         &path[level].bp_ctxt);
2092                 if (ret < 0)
2093                         return ret;
2094                 nilfs_btnode_commit_change_key(
2095                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2096                         &path[level].bp_ctxt);
2097                 *bh = path[level].bp_ctxt.bh;
2098         }
2099
2100         nilfs_btree_node_set_ptr(btree, parent,
2101                                  path[level + 1].bp_index, blocknr);
2102
2103         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2104         /* on-disk format */
2105         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2106         binfo->bi_dat.bi_level = level;
2107
2108         return 0;
2109 }
2110
2111 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2112                                 struct nilfs_btree_path *path,
2113                                 int level,
2114                                 struct buffer_head **bh,
2115                                 sector_t blocknr,
2116                                 union nilfs_binfo *binfo)
2117 {
2118         struct nilfs_btree_node *parent;
2119         struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
2120         __u64 key;
2121         __u64 ptr;
2122         union nilfs_bmap_ptr_req req;
2123         int ret;
2124
2125         parent = nilfs_btree_get_node(btree, path, level + 1);
2126         ptr = nilfs_btree_node_get_ptr(btree, parent,
2127                                        path[level + 1].bp_index);
2128         req.bpr_ptr = ptr;
2129         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2130         if (ret < 0)
2131                 return ret;
2132         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2133
2134         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2135         /* on-disk format */
2136         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2137         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2138
2139         return 0;
2140 }
2141
2142 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2143                               struct buffer_head **bh,
2144                               sector_t blocknr,
2145                               union nilfs_binfo *binfo)
2146 {
2147         struct nilfs_btree *btree;
2148         struct nilfs_btree_path *path;
2149         struct nilfs_btree_node *node;
2150         __u64 key;
2151         int level, ret;
2152
2153         btree = (struct nilfs_btree *)bmap;
2154         path = nilfs_btree_alloc_path();
2155         if (path == NULL)
2156                 return -ENOMEM;
2157         nilfs_btree_init_path(path);
2158
2159         if (buffer_nilfs_node(*bh)) {
2160                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2161                 key = nilfs_btree_node_get_key(node, 0);
2162                 level = nilfs_btree_node_get_level(node);
2163         } else {
2164                 key = nilfs_bmap_data_get_key(bmap, *bh);
2165                 level = NILFS_BTREE_LEVEL_DATA;
2166         }
2167
2168         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2169         if (ret < 0) {
2170                 WARN_ON(ret == -ENOENT);
2171                 goto out;
2172         }
2173
2174         ret = NILFS_BMAP_USE_VBN(bmap) ?
2175                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2176                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2177
2178  out:
2179         nilfs_btree_release_path(path);
2180         nilfs_btree_free_path(path);
2181
2182         return ret;
2183 }
2184
2185 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2186                                  struct buffer_head **bh,
2187                                  sector_t blocknr,
2188                                  union nilfs_binfo *binfo)
2189 {
2190         struct nilfs_btree_node *node;
2191         __u64 key;
2192         int ret;
2193
2194         ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
2195                              blocknr);
2196         if (ret < 0)
2197                 return ret;
2198
2199         if (buffer_nilfs_node(*bh)) {
2200                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2201                 key = nilfs_btree_node_get_key(node, 0);
2202         } else
2203                 key = nilfs_bmap_data_get_key(bmap, *bh);
2204
2205         /* on-disk format */
2206         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2207         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2208
2209         return 0;
2210 }
2211
2212 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2213 {
2214         struct buffer_head *bh;
2215         struct nilfs_btree *btree;
2216         struct nilfs_btree_path *path;
2217         __u64 ptr;
2218         int ret;
2219
2220         btree = (struct nilfs_btree *)bmap;
2221         path = nilfs_btree_alloc_path();
2222         if (path == NULL)
2223                 return -ENOMEM;
2224         nilfs_btree_init_path(path);
2225
2226         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2227         if (ret < 0) {
2228                 WARN_ON(ret == -ENOENT);
2229                 goto out;
2230         }
2231         ret = nilfs_btree_get_block(btree, ptr, &bh);
2232         if (ret < 0) {
2233                 WARN_ON(ret == -ENOENT);
2234                 goto out;
2235         }
2236
2237         if (!buffer_dirty(bh))
2238                 nilfs_btnode_mark_dirty(bh);
2239         brelse(bh);
2240         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2241                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2242
2243  out:
2244         nilfs_btree_release_path(path);
2245         nilfs_btree_free_path(path);
2246         return ret;
2247 }
2248
2249 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2250         .bop_lookup             =       nilfs_btree_lookup,
2251         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2252         .bop_insert             =       nilfs_btree_insert,
2253         .bop_delete             =       nilfs_btree_delete,
2254         .bop_clear              =       NULL,
2255
2256         .bop_propagate          =       nilfs_btree_propagate,
2257
2258         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2259
2260         .bop_assign             =       nilfs_btree_assign,
2261         .bop_mark               =       nilfs_btree_mark,
2262
2263         .bop_last_key           =       nilfs_btree_last_key,
2264         .bop_check_insert       =       NULL,
2265         .bop_check_delete       =       nilfs_btree_check_delete,
2266         .bop_gather_data        =       nilfs_btree_gather_data,
2267 };
2268
2269 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2270         .bop_lookup             =       NULL,
2271         .bop_lookup_contig      =       NULL,
2272         .bop_insert             =       NULL,
2273         .bop_delete             =       NULL,
2274         .bop_clear              =       NULL,
2275
2276         .bop_propagate          =       nilfs_btree_propagate_gc,
2277
2278         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2279
2280         .bop_assign             =       nilfs_btree_assign_gc,
2281         .bop_mark               =       NULL,
2282
2283         .bop_last_key           =       NULL,
2284         .bop_check_insert       =       NULL,
2285         .bop_check_delete       =       NULL,
2286         .bop_gather_data        =       NULL,
2287 };
2288
2289 int nilfs_btree_init(struct nilfs_bmap *bmap)
2290 {
2291         bmap->b_ops = &nilfs_btree_ops;
2292         return 0;
2293 }
2294
2295 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2296 {
2297         bmap->b_ops = &nilfs_btree_ops_gc;
2298 }