nilfs2: move out mark_inode_dirty calls from bmap routines
[safe/jmp/linux-2.6] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33
34 /**
35  * struct nilfs_btree_path - A path on which B-tree operations are executed
36  * @bp_bh: buffer head of node block
37  * @bp_sib_bh: buffer head of sibling node block
38  * @bp_index: index of child node
39  * @bp_oldreq: ptr end request for old ptr
40  * @bp_newreq: ptr alloc request for new ptr
41  * @bp_op: rebalance operation
42  */
43 struct nilfs_btree_path {
44         struct buffer_head *bp_bh;
45         struct buffer_head *bp_sib_bh;
46         int bp_index;
47         union nilfs_bmap_ptr_req bp_oldreq;
48         union nilfs_bmap_ptr_req bp_newreq;
49         struct nilfs_btnode_chkey_ctxt bp_ctxt;
50         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
51                       int, __u64 *, __u64 *);
52 };
53
54 /*
55  * B-tree path operations
56  */
57
58 static struct kmem_cache *nilfs_btree_path_cache;
59
60 int __init nilfs_btree_path_cache_init(void)
61 {
62         nilfs_btree_path_cache =
63                 kmem_cache_create("nilfs2_btree_path_cache",
64                                   sizeof(struct nilfs_btree_path) *
65                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
66         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
67 }
68
69 void nilfs_btree_path_cache_destroy(void)
70 {
71         kmem_cache_destroy(nilfs_btree_path_cache);
72 }
73
74 static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void)
75 {
76         return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
77 }
78
79 static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
80 {
81         kmem_cache_free(nilfs_btree_path_cache, path);
82 }
83
84 static void nilfs_btree_init_path(struct nilfs_btree_path *path)
85 {
86         int level;
87
88         for (level = NILFS_BTREE_LEVEL_DATA;
89              level < NILFS_BTREE_LEVEL_MAX;
90              level++) {
91                 path[level].bp_bh = NULL;
92                 path[level].bp_sib_bh = NULL;
93                 path[level].bp_index = 0;
94                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
95                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
96                 path[level].bp_op = NULL;
97         }
98 }
99
100 static void nilfs_btree_release_path(struct nilfs_btree_path *path)
101 {
102         int level;
103
104         for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX;
105              level++)
106                 brelse(path[level].bp_bh);
107 }
108
109 /*
110  * B-tree node operations
111  */
112 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
113                                  struct buffer_head **bhp)
114 {
115         struct address_space *btnc =
116                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
117         return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
118 }
119
120 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
121                                      __u64 ptr, struct buffer_head **bhp)
122 {
123         struct address_space *btnc =
124                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
125         int ret;
126
127         ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
128         if (!ret)
129                 set_buffer_nilfs_volatile(*bhp);
130         return ret;
131 }
132
133 static inline int
134 nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
135 {
136         return node->bn_flags;
137 }
138
139 static inline void
140 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
141 {
142         node->bn_flags = flags;
143 }
144
145 static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
146 {
147         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
148 }
149
150 static inline int
151 nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
152 {
153         return node->bn_level;
154 }
155
156 static inline void
157 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
158 {
159         node->bn_level = level;
160 }
161
162 static inline int
163 nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
164 {
165         return le16_to_cpu(node->bn_nchildren);
166 }
167
168 static inline void
169 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
170 {
171         node->bn_nchildren = cpu_to_le16(nchildren);
172 }
173
174 static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
175 {
176         return 1 << btree->bt_bmap.b_inode->i_blkbits;
177 }
178
179 static inline int
180 nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
181                                const struct nilfs_btree *btree)
182 {
183         return nilfs_btree_node_root(node) ?
184                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
185                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
186 }
187
188 static inline int
189 nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
190                                const struct nilfs_btree *btree)
191 {
192         return nilfs_btree_node_root(node) ?
193                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
194                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
195 }
196
197 static inline __le64 *
198 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
199 {
200         return (__le64 *)((char *)(node + 1) +
201                           (nilfs_btree_node_root(node) ?
202                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
203 }
204
205 static inline __le64 *
206 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
207                        const struct nilfs_btree *btree)
208 {
209         return (__le64 *)(nilfs_btree_node_dkeys(node) +
210                           nilfs_btree_node_nchildren_max(node, btree));
211 }
212
213 static inline __u64
214 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
215 {
216         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
217 }
218
219 static inline void
220 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
221 {
222         *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
223 }
224
225 static inline __u64
226 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
227                          const struct nilfs_btree_node *node, int index)
228 {
229         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
230                                         index));
231 }
232
233 static inline void
234 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
235                          struct nilfs_btree_node *node, int index, __u64 ptr)
236 {
237         *(nilfs_btree_node_dptrs(node, btree) + index) =
238                 nilfs_bmap_ptr_to_dptr(ptr);
239 }
240
241 static void nilfs_btree_node_init(struct nilfs_btree *btree,
242                                   struct nilfs_btree_node *node,
243                                   int flags, int level, int nchildren,
244                                   const __u64 *keys, const __u64 *ptrs)
245 {
246         __le64 *dkeys;
247         __le64 *dptrs;
248         int i;
249
250         nilfs_btree_node_set_flags(node, flags);
251         nilfs_btree_node_set_level(node, level);
252         nilfs_btree_node_set_nchildren(node, nchildren);
253
254         dkeys = nilfs_btree_node_dkeys(node);
255         dptrs = nilfs_btree_node_dptrs(node, btree);
256         for (i = 0; i < nchildren; i++) {
257                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
258                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
259         }
260 }
261
262 /* Assume the buffer heads corresponding to left and right are locked. */
263 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
264                                        struct nilfs_btree_node *left,
265                                        struct nilfs_btree_node *right,
266                                        int n)
267 {
268         __le64 *ldkeys, *rdkeys;
269         __le64 *ldptrs, *rdptrs;
270         int lnchildren, rnchildren;
271
272         ldkeys = nilfs_btree_node_dkeys(left);
273         ldptrs = nilfs_btree_node_dptrs(left, btree);
274         lnchildren = nilfs_btree_node_get_nchildren(left);
275
276         rdkeys = nilfs_btree_node_dkeys(right);
277         rdptrs = nilfs_btree_node_dptrs(right, btree);
278         rnchildren = nilfs_btree_node_get_nchildren(right);
279
280         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
281         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
282         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
283         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
284
285         lnchildren += n;
286         rnchildren -= n;
287         nilfs_btree_node_set_nchildren(left, lnchildren);
288         nilfs_btree_node_set_nchildren(right, rnchildren);
289 }
290
291 /* Assume that the buffer heads corresponding to left and right are locked. */
292 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
293                                         struct nilfs_btree_node *left,
294                                         struct nilfs_btree_node *right,
295                                         int n)
296 {
297         __le64 *ldkeys, *rdkeys;
298         __le64 *ldptrs, *rdptrs;
299         int lnchildren, rnchildren;
300
301         ldkeys = nilfs_btree_node_dkeys(left);
302         ldptrs = nilfs_btree_node_dptrs(left, btree);
303         lnchildren = nilfs_btree_node_get_nchildren(left);
304
305         rdkeys = nilfs_btree_node_dkeys(right);
306         rdptrs = nilfs_btree_node_dptrs(right, btree);
307         rnchildren = nilfs_btree_node_get_nchildren(right);
308
309         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
310         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
311         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
312         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
313
314         lnchildren -= n;
315         rnchildren += n;
316         nilfs_btree_node_set_nchildren(left, lnchildren);
317         nilfs_btree_node_set_nchildren(right, rnchildren);
318 }
319
320 /* Assume that the buffer head corresponding to node is locked. */
321 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
322                                     struct nilfs_btree_node *node,
323                                     __u64 key, __u64 ptr, int index)
324 {
325         __le64 *dkeys;
326         __le64 *dptrs;
327         int nchildren;
328
329         dkeys = nilfs_btree_node_dkeys(node);
330         dptrs = nilfs_btree_node_dptrs(node, btree);
331         nchildren = nilfs_btree_node_get_nchildren(node);
332         if (index < nchildren) {
333                 memmove(dkeys + index + 1, dkeys + index,
334                         (nchildren - index) * sizeof(*dkeys));
335                 memmove(dptrs + index + 1, dptrs + index,
336                         (nchildren - index) * sizeof(*dptrs));
337         }
338         dkeys[index] = nilfs_bmap_key_to_dkey(key);
339         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
340         nchildren++;
341         nilfs_btree_node_set_nchildren(node, nchildren);
342 }
343
344 /* Assume that the buffer head corresponding to node is locked. */
345 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
346                                     struct nilfs_btree_node *node,
347                                     __u64 *keyp, __u64 *ptrp, int index)
348 {
349         __u64 key;
350         __u64 ptr;
351         __le64 *dkeys;
352         __le64 *dptrs;
353         int nchildren;
354
355         dkeys = nilfs_btree_node_dkeys(node);
356         dptrs = nilfs_btree_node_dptrs(node, btree);
357         key = nilfs_bmap_dkey_to_key(dkeys[index]);
358         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
359         nchildren = nilfs_btree_node_get_nchildren(node);
360         if (keyp != NULL)
361                 *keyp = key;
362         if (ptrp != NULL)
363                 *ptrp = ptr;
364
365         if (index < nchildren - 1) {
366                 memmove(dkeys + index, dkeys + index + 1,
367                         (nchildren - index - 1) * sizeof(*dkeys));
368                 memmove(dptrs + index, dptrs + index + 1,
369                         (nchildren - index - 1) * sizeof(*dptrs));
370         }
371         nchildren--;
372         nilfs_btree_node_set_nchildren(node, nchildren);
373 }
374
375 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
376                                    __u64 key, int *indexp)
377 {
378         __u64 nkey;
379         int index, low, high, s;
380
381         /* binary search */
382         low = 0;
383         high = nilfs_btree_node_get_nchildren(node) - 1;
384         index = 0;
385         s = 0;
386         while (low <= high) {
387                 index = (low + high) / 2;
388                 nkey = nilfs_btree_node_get_key(node, index);
389                 if (nkey == key) {
390                         s = 0;
391                         goto out;
392                 } else if (nkey < key) {
393                         low = index + 1;
394                         s = -1;
395                 } else {
396                         high = index - 1;
397                         s = 1;
398                 }
399         }
400
401         /* adjust index */
402         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
403                 if (s > 0 && index > 0)
404                         index--;
405         } else if (s < 0)
406                 index++;
407
408  out:
409         *indexp = index;
410
411         return s == 0;
412 }
413
414 static inline struct nilfs_btree_node *
415 nilfs_btree_get_root(const struct nilfs_btree *btree)
416 {
417         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
418 }
419
420 static inline struct nilfs_btree_node *
421 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
422 {
423         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
424 }
425
426 static inline struct nilfs_btree_node *
427 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
428 {
429         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
430 }
431
432 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
433 {
434         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
435 }
436
437 static inline struct nilfs_btree_node *
438 nilfs_btree_get_node(const struct nilfs_btree *btree,
439                      const struct nilfs_btree_path *path,
440                      int level)
441 {
442         return (level == nilfs_btree_height(btree) - 1) ?
443                 nilfs_btree_get_root(btree) :
444                 nilfs_btree_get_nonroot_node(path, level);
445 }
446
447 static inline int
448 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
449 {
450         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
451                 dump_stack();
452                 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
453                        nilfs_btree_node_get_level(node), level);
454                 return 1;
455         }
456         return 0;
457 }
458
459 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
460                                  struct nilfs_btree_path *path,
461                                  __u64 key, __u64 *ptrp, int minlevel)
462 {
463         struct nilfs_btree_node *node;
464         __u64 ptr;
465         int level, index, found, ret;
466
467         node = nilfs_btree_get_root(btree);
468         level = nilfs_btree_node_get_level(node);
469         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
470                 return -ENOENT;
471
472         found = nilfs_btree_node_lookup(node, key, &index);
473         ptr = nilfs_btree_node_get_ptr(btree, node, index);
474         path[level].bp_bh = NULL;
475         path[level].bp_index = index;
476
477         for (level--; level >= minlevel; level--) {
478                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
479                 if (ret < 0)
480                         return ret;
481                 node = nilfs_btree_get_nonroot_node(path, level);
482                 if (nilfs_btree_bad_node(node, level))
483                         return -EINVAL;
484                 if (!found)
485                         found = nilfs_btree_node_lookup(node, key, &index);
486                 else
487                         index = 0;
488                 if (index < nilfs_btree_node_nchildren_max(node, btree))
489                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
490                 else {
491                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
492                         /* insert */
493                         ptr = NILFS_BMAP_INVALID_PTR;
494                 }
495                 path[level].bp_index = index;
496         }
497         if (!found)
498                 return -ENOENT;
499
500         if (ptrp != NULL)
501                 *ptrp = ptr;
502
503         return 0;
504 }
505
506 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
507                                       struct nilfs_btree_path *path,
508                                       __u64 *keyp, __u64 *ptrp)
509 {
510         struct nilfs_btree_node *node;
511         __u64 ptr;
512         int index, level, ret;
513
514         node = nilfs_btree_get_root(btree);
515         index = nilfs_btree_node_get_nchildren(node) - 1;
516         if (index < 0)
517                 return -ENOENT;
518         level = nilfs_btree_node_get_level(node);
519         ptr = nilfs_btree_node_get_ptr(btree, node, index);
520         path[level].bp_bh = NULL;
521         path[level].bp_index = index;
522
523         for (level--; level > 0; level--) {
524                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
525                 if (ret < 0)
526                         return ret;
527                 node = nilfs_btree_get_nonroot_node(path, level);
528                 if (nilfs_btree_bad_node(node, level))
529                         return -EINVAL;
530                 index = nilfs_btree_node_get_nchildren(node) - 1;
531                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
532                 path[level].bp_index = index;
533         }
534
535         if (keyp != NULL)
536                 *keyp = nilfs_btree_node_get_key(node, index);
537         if (ptrp != NULL)
538                 *ptrp = ptr;
539
540         return 0;
541 }
542
543 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
544                               __u64 key, int level, __u64 *ptrp)
545 {
546         struct nilfs_btree *btree;
547         struct nilfs_btree_path *path;
548         __u64 ptr;
549         int ret;
550
551         btree = (struct nilfs_btree *)bmap;
552         path = nilfs_btree_alloc_path();
553         if (path == NULL)
554                 return -ENOMEM;
555         nilfs_btree_init_path(path);
556
557         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
558
559         if (ptrp != NULL)
560                 *ptrp = ptr;
561
562         nilfs_btree_release_path(path);
563         nilfs_btree_free_path(path);
564
565         return ret;
566 }
567
568 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
569                                      __u64 key, __u64 *ptrp, unsigned maxblocks)
570 {
571         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
572         struct nilfs_btree_path *path;
573         struct nilfs_btree_node *node;
574         struct inode *dat = NULL;
575         __u64 ptr, ptr2;
576         sector_t blocknr;
577         int level = NILFS_BTREE_LEVEL_NODE_MIN;
578         int ret, cnt, index, maxlevel;
579
580         path = nilfs_btree_alloc_path();
581         if (path == NULL)
582                 return -ENOMEM;
583         nilfs_btree_init_path(path);
584         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
585         if (ret < 0)
586                 goto out;
587
588         if (NILFS_BMAP_USE_VBN(bmap)) {
589                 dat = nilfs_bmap_get_dat(bmap);
590                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
591                 if (ret < 0)
592                         goto out;
593                 ptr = blocknr;
594         }
595         cnt = 1;
596         if (cnt == maxblocks)
597                 goto end;
598
599         maxlevel = nilfs_btree_height(btree) - 1;
600         node = nilfs_btree_get_node(btree, path, level);
601         index = path[level].bp_index + 1;
602         for (;;) {
603                 while (index < nilfs_btree_node_get_nchildren(node)) {
604                         if (nilfs_btree_node_get_key(node, index) !=
605                             key + cnt)
606                                 goto end;
607                         ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
608                         if (dat) {
609                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
610                                 if (ret < 0)
611                                         goto out;
612                                 ptr2 = blocknr;
613                         }
614                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
615                                 goto end;
616                         index++;
617                         continue;
618                 }
619                 if (level == maxlevel)
620                         break;
621
622                 /* look-up right sibling node */
623                 node = nilfs_btree_get_node(btree, path, level + 1);
624                 index = path[level + 1].bp_index + 1;
625                 if (index >= nilfs_btree_node_get_nchildren(node) ||
626                     nilfs_btree_node_get_key(node, index) != key + cnt)
627                         break;
628                 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
629                 path[level + 1].bp_index = index;
630
631                 brelse(path[level].bp_bh);
632                 path[level].bp_bh = NULL;
633                 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
634                 if (ret < 0)
635                         goto out;
636                 node = nilfs_btree_get_nonroot_node(path, level);
637                 index = 0;
638                 path[level].bp_index = index;
639         }
640  end:
641         *ptrp = ptr;
642         ret = cnt;
643  out:
644         nilfs_btree_release_path(path);
645         nilfs_btree_free_path(path);
646         return ret;
647 }
648
649 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
650                                     struct nilfs_btree_path *path,
651                                     int level, __u64 key)
652 {
653         if (level < nilfs_btree_height(btree) - 1) {
654                 do {
655                         nilfs_btree_node_set_key(
656                                 nilfs_btree_get_nonroot_node(path, level),
657                                 path[level].bp_index, key);
658                         if (!buffer_dirty(path[level].bp_bh))
659                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
660                 } while ((path[level].bp_index == 0) &&
661                          (++level < nilfs_btree_height(btree) - 1));
662         }
663
664         /* root */
665         if (level == nilfs_btree_height(btree) - 1) {
666                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
667                                          path[level].bp_index, key);
668         }
669 }
670
671 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
672                                   struct nilfs_btree_path *path,
673                                   int level, __u64 *keyp, __u64 *ptrp)
674 {
675         struct nilfs_btree_node *node;
676
677         if (level < nilfs_btree_height(btree) - 1) {
678                 node = nilfs_btree_get_nonroot_node(path, level);
679                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
680                                         path[level].bp_index);
681                 if (!buffer_dirty(path[level].bp_bh))
682                         nilfs_btnode_mark_dirty(path[level].bp_bh);
683
684                 if (path[level].bp_index == 0)
685                         nilfs_btree_promote_key(btree, path, level + 1,
686                                                 nilfs_btree_node_get_key(node,
687                                                                          0));
688         } else {
689                 node = nilfs_btree_get_root(btree);
690                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
691                                         path[level].bp_index);
692         }
693 }
694
695 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
696                                    struct nilfs_btree_path *path,
697                                    int level, __u64 *keyp, __u64 *ptrp)
698 {
699         struct nilfs_btree_node *node, *left;
700         int nchildren, lnchildren, n, move;
701
702         node = nilfs_btree_get_nonroot_node(path, level);
703         left = nilfs_btree_get_sib_node(path, level);
704         nchildren = nilfs_btree_node_get_nchildren(node);
705         lnchildren = nilfs_btree_node_get_nchildren(left);
706         move = 0;
707
708         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
709         if (n > path[level].bp_index) {
710                 /* move insert point */
711                 n--;
712                 move = 1;
713         }
714
715         nilfs_btree_node_move_left(btree, left, node, n);
716
717         if (!buffer_dirty(path[level].bp_bh))
718                 nilfs_btnode_mark_dirty(path[level].bp_bh);
719         if (!buffer_dirty(path[level].bp_sib_bh))
720                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
721
722         nilfs_btree_promote_key(btree, path, level + 1,
723                                 nilfs_btree_node_get_key(node, 0));
724
725         if (move) {
726                 brelse(path[level].bp_bh);
727                 path[level].bp_bh = path[level].bp_sib_bh;
728                 path[level].bp_sib_bh = NULL;
729                 path[level].bp_index += lnchildren;
730                 path[level + 1].bp_index--;
731         } else {
732                 brelse(path[level].bp_sib_bh);
733                 path[level].bp_sib_bh = NULL;
734                 path[level].bp_index -= n;
735         }
736
737         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
738 }
739
740 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
741                                     struct nilfs_btree_path *path,
742                                     int level, __u64 *keyp, __u64 *ptrp)
743 {
744         struct nilfs_btree_node *node, *right;
745         int nchildren, rnchildren, n, move;
746
747         node = nilfs_btree_get_nonroot_node(path, level);
748         right = nilfs_btree_get_sib_node(path, level);
749         nchildren = nilfs_btree_node_get_nchildren(node);
750         rnchildren = nilfs_btree_node_get_nchildren(right);
751         move = 0;
752
753         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
754         if (n > nchildren - path[level].bp_index) {
755                 /* move insert point */
756                 n--;
757                 move = 1;
758         }
759
760         nilfs_btree_node_move_right(btree, node, right, n);
761
762         if (!buffer_dirty(path[level].bp_bh))
763                 nilfs_btnode_mark_dirty(path[level].bp_bh);
764         if (!buffer_dirty(path[level].bp_sib_bh))
765                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
766
767         path[level + 1].bp_index++;
768         nilfs_btree_promote_key(btree, path, level + 1,
769                                 nilfs_btree_node_get_key(right, 0));
770         path[level + 1].bp_index--;
771
772         if (move) {
773                 brelse(path[level].bp_bh);
774                 path[level].bp_bh = path[level].bp_sib_bh;
775                 path[level].bp_sib_bh = NULL;
776                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
777                 path[level + 1].bp_index++;
778         } else {
779                 brelse(path[level].bp_sib_bh);
780                 path[level].bp_sib_bh = NULL;
781         }
782
783         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
784 }
785
786 static void nilfs_btree_split(struct nilfs_btree *btree,
787                               struct nilfs_btree_path *path,
788                               int level, __u64 *keyp, __u64 *ptrp)
789 {
790         struct nilfs_btree_node *node, *right;
791         __u64 newkey;
792         __u64 newptr;
793         int nchildren, n, move;
794
795         node = nilfs_btree_get_nonroot_node(path, level);
796         right = nilfs_btree_get_sib_node(path, level);
797         nchildren = nilfs_btree_node_get_nchildren(node);
798         move = 0;
799
800         n = (nchildren + 1) / 2;
801         if (n > nchildren - path[level].bp_index) {
802                 n--;
803                 move = 1;
804         }
805
806         nilfs_btree_node_move_right(btree, node, right, n);
807
808         if (!buffer_dirty(path[level].bp_bh))
809                 nilfs_btnode_mark_dirty(path[level].bp_bh);
810         if (!buffer_dirty(path[level].bp_sib_bh))
811                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
812
813         newkey = nilfs_btree_node_get_key(right, 0);
814         newptr = path[level].bp_newreq.bpr_ptr;
815
816         if (move) {
817                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
818                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
819                                         path[level].bp_index);
820
821                 *keyp = nilfs_btree_node_get_key(right, 0);
822                 *ptrp = path[level].bp_newreq.bpr_ptr;
823
824                 brelse(path[level].bp_bh);
825                 path[level].bp_bh = path[level].bp_sib_bh;
826                 path[level].bp_sib_bh = NULL;
827         } else {
828                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
829
830                 *keyp = nilfs_btree_node_get_key(right, 0);
831                 *ptrp = path[level].bp_newreq.bpr_ptr;
832
833                 brelse(path[level].bp_sib_bh);
834                 path[level].bp_sib_bh = NULL;
835         }
836
837         path[level + 1].bp_index++;
838 }
839
840 static void nilfs_btree_grow(struct nilfs_btree *btree,
841                              struct nilfs_btree_path *path,
842                              int level, __u64 *keyp, __u64 *ptrp)
843 {
844         struct nilfs_btree_node *root, *child;
845         int n;
846
847         root = nilfs_btree_get_root(btree);
848         child = nilfs_btree_get_sib_node(path, level);
849
850         n = nilfs_btree_node_get_nchildren(root);
851
852         nilfs_btree_node_move_right(btree, root, child, n);
853         nilfs_btree_node_set_level(root, level + 1);
854
855         if (!buffer_dirty(path[level].bp_sib_bh))
856                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
857
858         path[level].bp_bh = path[level].bp_sib_bh;
859         path[level].bp_sib_bh = NULL;
860
861         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
862
863         *keyp = nilfs_btree_node_get_key(child, 0);
864         *ptrp = path[level].bp_newreq.bpr_ptr;
865 }
866
867 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
868                                    const struct nilfs_btree_path *path)
869 {
870         struct nilfs_btree_node *node;
871         int level;
872
873         if (path == NULL)
874                 return NILFS_BMAP_INVALID_PTR;
875
876         /* left sibling */
877         level = NILFS_BTREE_LEVEL_NODE_MIN;
878         if (path[level].bp_index > 0) {
879                 node = nilfs_btree_get_node(btree, path, level);
880                 return nilfs_btree_node_get_ptr(btree, node,
881                                                 path[level].bp_index - 1);
882         }
883
884         /* parent */
885         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
886         if (level <= nilfs_btree_height(btree) - 1) {
887                 node = nilfs_btree_get_node(btree, path, level);
888                 return nilfs_btree_node_get_ptr(btree, node,
889                                                 path[level].bp_index);
890         }
891
892         return NILFS_BMAP_INVALID_PTR;
893 }
894
895 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
896                                        const struct nilfs_btree_path *path,
897                                        __u64 key)
898 {
899         __u64 ptr;
900
901         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
902         if (ptr != NILFS_BMAP_INVALID_PTR)
903                 /* sequential access */
904                 return ptr;
905         else {
906                 ptr = nilfs_btree_find_near(btree, path);
907                 if (ptr != NILFS_BMAP_INVALID_PTR)
908                         /* near */
909                         return ptr;
910         }
911         /* block group */
912         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
913 }
914
915 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
916                                      __u64 ptr)
917 {
918         btree->bt_bmap.b_last_allocated_key = key;
919         btree->bt_bmap.b_last_allocated_ptr = ptr;
920 }
921
922 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
923                                       struct nilfs_btree_path *path,
924                                       int *levelp, __u64 key, __u64 ptr,
925                                       struct nilfs_bmap_stats *stats)
926 {
927         struct buffer_head *bh;
928         struct nilfs_btree_node *node, *parent, *sib;
929         __u64 sibptr;
930         int pindex, level, ret;
931         struct inode *dat = NULL;
932
933         stats->bs_nblocks = 0;
934         level = NILFS_BTREE_LEVEL_DATA;
935
936         /* allocate a new ptr for data block */
937         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
938                 path[level].bp_newreq.bpr_ptr =
939                         nilfs_btree_find_target_v(btree, path, key);
940                 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
941         }
942
943         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
944                                            &path[level].bp_newreq, dat);
945         if (ret < 0)
946                 goto err_out_data;
947
948         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
949              level < nilfs_btree_height(btree) - 1;
950              level++) {
951                 node = nilfs_btree_get_nonroot_node(path, level);
952                 if (nilfs_btree_node_get_nchildren(node) <
953                     nilfs_btree_node_nchildren_max(node, btree)) {
954                         path[level].bp_op = nilfs_btree_do_insert;
955                         stats->bs_nblocks++;
956                         goto out;
957                 }
958
959                 parent = nilfs_btree_get_node(btree, path, level + 1);
960                 pindex = path[level + 1].bp_index;
961
962                 /* left sibling */
963                 if (pindex > 0) {
964                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
965                                                           pindex - 1);
966                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
967                         if (ret < 0)
968                                 goto err_out_child_node;
969                         sib = (struct nilfs_btree_node *)bh->b_data;
970                         if (nilfs_btree_node_get_nchildren(sib) <
971                             nilfs_btree_node_nchildren_max(sib, btree)) {
972                                 path[level].bp_sib_bh = bh;
973                                 path[level].bp_op = nilfs_btree_carry_left;
974                                 stats->bs_nblocks++;
975                                 goto out;
976                         } else
977                                 brelse(bh);
978                 }
979
980                 /* right sibling */
981                 if (pindex <
982                     nilfs_btree_node_get_nchildren(parent) - 1) {
983                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
984                                                           pindex + 1);
985                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
986                         if (ret < 0)
987                                 goto err_out_child_node;
988                         sib = (struct nilfs_btree_node *)bh->b_data;
989                         if (nilfs_btree_node_get_nchildren(sib) <
990                             nilfs_btree_node_nchildren_max(sib, btree)) {
991                                 path[level].bp_sib_bh = bh;
992                                 path[level].bp_op = nilfs_btree_carry_right;
993                                 stats->bs_nblocks++;
994                                 goto out;
995                         } else
996                                 brelse(bh);
997                 }
998
999                 /* split */
1000                 path[level].bp_newreq.bpr_ptr =
1001                         path[level - 1].bp_newreq.bpr_ptr + 1;
1002                 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1003                                                    &path[level].bp_newreq, dat);
1004                 if (ret < 0)
1005                         goto err_out_child_node;
1006                 ret = nilfs_btree_get_new_block(btree,
1007                                                 path[level].bp_newreq.bpr_ptr,
1008                                                 &bh);
1009                 if (ret < 0)
1010                         goto err_out_curr_node;
1011
1012                 stats->bs_nblocks++;
1013
1014                 nilfs_btree_node_init(btree,
1015                                       (struct nilfs_btree_node *)bh->b_data,
1016                                       0, level, 0, NULL, NULL);
1017                 path[level].bp_sib_bh = bh;
1018                 path[level].bp_op = nilfs_btree_split;
1019         }
1020
1021         /* root */
1022         node = nilfs_btree_get_root(btree);
1023         if (nilfs_btree_node_get_nchildren(node) <
1024             nilfs_btree_node_nchildren_max(node, btree)) {
1025                 path[level].bp_op = nilfs_btree_do_insert;
1026                 stats->bs_nblocks++;
1027                 goto out;
1028         }
1029
1030         /* grow */
1031         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1032         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1033                                            &path[level].bp_newreq, dat);
1034         if (ret < 0)
1035                 goto err_out_child_node;
1036         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1037                                         &bh);
1038         if (ret < 0)
1039                 goto err_out_curr_node;
1040
1041         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1042                               0, level, 0, NULL, NULL);
1043         path[level].bp_sib_bh = bh;
1044         path[level].bp_op = nilfs_btree_grow;
1045
1046         level++;
1047         path[level].bp_op = nilfs_btree_do_insert;
1048
1049         /* a newly-created node block and a data block are added */
1050         stats->bs_nblocks += 2;
1051
1052         /* success */
1053  out:
1054         *levelp = level;
1055         return ret;
1056
1057         /* error */
1058  err_out_curr_node:
1059         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1060                                    dat);
1061  err_out_child_node:
1062         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1063                 nilfs_btnode_delete(path[level].bp_sib_bh);
1064                 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1065                                            &path[level].bp_newreq, dat);
1066
1067         }
1068
1069         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1070                                    dat);
1071  err_out_data:
1072         *levelp = level;
1073         stats->bs_nblocks = 0;
1074         return ret;
1075 }
1076
1077 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1078                                       struct nilfs_btree_path *path,
1079                                       int maxlevel, __u64 key, __u64 ptr)
1080 {
1081         struct inode *dat = NULL;
1082         int level;
1083
1084         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1085         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1086         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
1087                 nilfs_btree_set_target_v(btree, key, ptr);
1088                 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1089         }
1090
1091         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1092                 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1093                                             &path[level - 1].bp_newreq, dat);
1094                 path[level].bp_op(btree, path, level, &key, &ptr);
1095         }
1096
1097         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1098                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1099 }
1100
1101 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1102 {
1103         struct nilfs_btree *btree;
1104         struct nilfs_btree_path *path;
1105         struct nilfs_bmap_stats stats;
1106         int level, ret;
1107
1108         btree = (struct nilfs_btree *)bmap;
1109         path = nilfs_btree_alloc_path();
1110         if (path == NULL)
1111                 return -ENOMEM;
1112         nilfs_btree_init_path(path);
1113
1114         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1115                                     NILFS_BTREE_LEVEL_NODE_MIN);
1116         if (ret != -ENOENT) {
1117                 if (ret == 0)
1118                         ret = -EEXIST;
1119                 goto out;
1120         }
1121
1122         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1123         if (ret < 0)
1124                 goto out;
1125         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1126         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1127
1128  out:
1129         nilfs_btree_release_path(path);
1130         nilfs_btree_free_path(path);
1131         return ret;
1132 }
1133
1134 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1135                                   struct nilfs_btree_path *path,
1136                                   int level, __u64 *keyp, __u64 *ptrp)
1137 {
1138         struct nilfs_btree_node *node;
1139
1140         if (level < nilfs_btree_height(btree) - 1) {
1141                 node = nilfs_btree_get_nonroot_node(path, level);
1142                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1143                                         path[level].bp_index);
1144                 if (!buffer_dirty(path[level].bp_bh))
1145                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1146                 if (path[level].bp_index == 0)
1147                         nilfs_btree_promote_key(btree, path, level + 1,
1148                                 nilfs_btree_node_get_key(node, 0));
1149         } else {
1150                 node = nilfs_btree_get_root(btree);
1151                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1152                                         path[level].bp_index);
1153         }
1154 }
1155
1156 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1157                                     struct nilfs_btree_path *path,
1158                                     int level, __u64 *keyp, __u64 *ptrp)
1159 {
1160         struct nilfs_btree_node *node, *left;
1161         int nchildren, lnchildren, n;
1162
1163         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1164
1165         node = nilfs_btree_get_nonroot_node(path, level);
1166         left = nilfs_btree_get_sib_node(path, level);
1167         nchildren = nilfs_btree_node_get_nchildren(node);
1168         lnchildren = nilfs_btree_node_get_nchildren(left);
1169
1170         n = (nchildren + lnchildren) / 2 - nchildren;
1171
1172         nilfs_btree_node_move_right(btree, left, node, n);
1173
1174         if (!buffer_dirty(path[level].bp_bh))
1175                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1176         if (!buffer_dirty(path[level].bp_sib_bh))
1177                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1178
1179         nilfs_btree_promote_key(btree, path, level + 1,
1180                                 nilfs_btree_node_get_key(node, 0));
1181
1182         brelse(path[level].bp_sib_bh);
1183         path[level].bp_sib_bh = NULL;
1184         path[level].bp_index += n;
1185 }
1186
1187 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1188                                      struct nilfs_btree_path *path,
1189                                      int level, __u64 *keyp, __u64 *ptrp)
1190 {
1191         struct nilfs_btree_node *node, *right;
1192         int nchildren, rnchildren, n;
1193
1194         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1195
1196         node = nilfs_btree_get_nonroot_node(path, level);
1197         right = nilfs_btree_get_sib_node(path, level);
1198         nchildren = nilfs_btree_node_get_nchildren(node);
1199         rnchildren = nilfs_btree_node_get_nchildren(right);
1200
1201         n = (nchildren + rnchildren) / 2 - nchildren;
1202
1203         nilfs_btree_node_move_left(btree, node, right, n);
1204
1205         if (!buffer_dirty(path[level].bp_bh))
1206                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1207         if (!buffer_dirty(path[level].bp_sib_bh))
1208                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1209
1210         path[level + 1].bp_index++;
1211         nilfs_btree_promote_key(btree, path, level + 1,
1212                                 nilfs_btree_node_get_key(right, 0));
1213         path[level + 1].bp_index--;
1214
1215         brelse(path[level].bp_sib_bh);
1216         path[level].bp_sib_bh = NULL;
1217 }
1218
1219 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1220                                     struct nilfs_btree_path *path,
1221                                     int level, __u64 *keyp, __u64 *ptrp)
1222 {
1223         struct nilfs_btree_node *node, *left;
1224         int n;
1225
1226         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1227
1228         node = nilfs_btree_get_nonroot_node(path, level);
1229         left = nilfs_btree_get_sib_node(path, level);
1230
1231         n = nilfs_btree_node_get_nchildren(node);
1232
1233         nilfs_btree_node_move_left(btree, left, node, n);
1234
1235         if (!buffer_dirty(path[level].bp_sib_bh))
1236                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1237
1238         nilfs_btnode_delete(path[level].bp_bh);
1239         path[level].bp_bh = path[level].bp_sib_bh;
1240         path[level].bp_sib_bh = NULL;
1241         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1242 }
1243
1244 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1245                                      struct nilfs_btree_path *path,
1246                                      int level, __u64 *keyp, __u64 *ptrp)
1247 {
1248         struct nilfs_btree_node *node, *right;
1249         int n;
1250
1251         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1252
1253         node = nilfs_btree_get_nonroot_node(path, level);
1254         right = nilfs_btree_get_sib_node(path, level);
1255
1256         n = nilfs_btree_node_get_nchildren(right);
1257
1258         nilfs_btree_node_move_left(btree, node, right, n);
1259
1260         if (!buffer_dirty(path[level].bp_bh))
1261                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1262
1263         nilfs_btnode_delete(path[level].bp_sib_bh);
1264         path[level].bp_sib_bh = NULL;
1265         path[level + 1].bp_index++;
1266 }
1267
1268 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1269                                struct nilfs_btree_path *path,
1270                                int level, __u64 *keyp, __u64 *ptrp)
1271 {
1272         struct nilfs_btree_node *root, *child;
1273         int n;
1274
1275         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1276
1277         root = nilfs_btree_get_root(btree);
1278         child = nilfs_btree_get_nonroot_node(path, level);
1279
1280         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1281         nilfs_btree_node_set_level(root, level);
1282         n = nilfs_btree_node_get_nchildren(child);
1283         nilfs_btree_node_move_left(btree, root, child, n);
1284
1285         nilfs_btnode_delete(path[level].bp_bh);
1286         path[level].bp_bh = NULL;
1287 }
1288
1289
1290 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1291                                       struct nilfs_btree_path *path,
1292                                       int *levelp,
1293                                       struct nilfs_bmap_stats *stats,
1294                                       struct inode *dat)
1295 {
1296         struct buffer_head *bh;
1297         struct nilfs_btree_node *node, *parent, *sib;
1298         __u64 sibptr;
1299         int pindex, level, ret;
1300
1301         ret = 0;
1302         stats->bs_nblocks = 0;
1303         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1304              level < nilfs_btree_height(btree) - 1;
1305              level++) {
1306                 node = nilfs_btree_get_nonroot_node(path, level);
1307                 path[level].bp_oldreq.bpr_ptr =
1308                         nilfs_btree_node_get_ptr(btree, node,
1309                                                  path[level].bp_index);
1310                 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1311                                                  &path[level].bp_oldreq, dat);
1312                 if (ret < 0)
1313                         goto err_out_child_node;
1314
1315                 if (nilfs_btree_node_get_nchildren(node) >
1316                     nilfs_btree_node_nchildren_min(node, btree)) {
1317                         path[level].bp_op = nilfs_btree_do_delete;
1318                         stats->bs_nblocks++;
1319                         goto out;
1320                 }
1321
1322                 parent = nilfs_btree_get_node(btree, path, level + 1);
1323                 pindex = path[level + 1].bp_index;
1324
1325                 if (pindex > 0) {
1326                         /* left sibling */
1327                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1328                                                           pindex - 1);
1329                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1330                         if (ret < 0)
1331                                 goto err_out_curr_node;
1332                         sib = (struct nilfs_btree_node *)bh->b_data;
1333                         if (nilfs_btree_node_get_nchildren(sib) >
1334                             nilfs_btree_node_nchildren_min(sib, btree)) {
1335                                 path[level].bp_sib_bh = bh;
1336                                 path[level].bp_op = nilfs_btree_borrow_left;
1337                                 stats->bs_nblocks++;
1338                                 goto out;
1339                         } else {
1340                                 path[level].bp_sib_bh = bh;
1341                                 path[level].bp_op = nilfs_btree_concat_left;
1342                                 stats->bs_nblocks++;
1343                                 /* continue; */
1344                         }
1345                 } else if (pindex <
1346                            nilfs_btree_node_get_nchildren(parent) - 1) {
1347                         /* right sibling */
1348                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1349                                                           pindex + 1);
1350                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1351                         if (ret < 0)
1352                                 goto err_out_curr_node;
1353                         sib = (struct nilfs_btree_node *)bh->b_data;
1354                         if (nilfs_btree_node_get_nchildren(sib) >
1355                             nilfs_btree_node_nchildren_min(sib, btree)) {
1356                                 path[level].bp_sib_bh = bh;
1357                                 path[level].bp_op = nilfs_btree_borrow_right;
1358                                 stats->bs_nblocks++;
1359                                 goto out;
1360                         } else {
1361                                 path[level].bp_sib_bh = bh;
1362                                 path[level].bp_op = nilfs_btree_concat_right;
1363                                 stats->bs_nblocks++;
1364                                 /* continue; */
1365                         }
1366                 } else {
1367                         /* no siblings */
1368                         /* the only child of the root node */
1369                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1370                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1371                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1372                                 path[level].bp_op = nilfs_btree_shrink;
1373                                 stats->bs_nblocks += 2;
1374                         } else {
1375                                 path[level].bp_op = nilfs_btree_do_delete;
1376                                 stats->bs_nblocks++;
1377                         }
1378
1379                         goto out;
1380
1381                 }
1382         }
1383
1384         node = nilfs_btree_get_root(btree);
1385         path[level].bp_oldreq.bpr_ptr =
1386                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1387
1388         ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1389                                          &path[level].bp_oldreq, dat);
1390         if (ret < 0)
1391                 goto err_out_child_node;
1392
1393         /* child of the root node is deleted */
1394         path[level].bp_op = nilfs_btree_do_delete;
1395         stats->bs_nblocks++;
1396
1397         /* success */
1398  out:
1399         *levelp = level;
1400         return ret;
1401
1402         /* error */
1403  err_out_curr_node:
1404         nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
1405  err_out_child_node:
1406         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1407                 brelse(path[level].bp_sib_bh);
1408                 nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1409                                          &path[level].bp_oldreq, dat);
1410         }
1411         *levelp = level;
1412         stats->bs_nblocks = 0;
1413         return ret;
1414 }
1415
1416 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1417                                       struct nilfs_btree_path *path,
1418                                       int maxlevel, struct inode *dat)
1419 {
1420         int level;
1421
1422         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1423                 nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1424                                           &path[level].bp_oldreq, dat);
1425                 path[level].bp_op(btree, path, level, NULL, NULL);
1426         }
1427
1428         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1429                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1430 }
1431
1432 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1433
1434 {
1435         struct nilfs_btree *btree;
1436         struct nilfs_btree_path *path;
1437         struct nilfs_bmap_stats stats;
1438         struct inode *dat;
1439         int level, ret;
1440
1441         btree = (struct nilfs_btree *)bmap;
1442         path = nilfs_btree_alloc_path();
1443         if (path == NULL)
1444                 return -ENOMEM;
1445         nilfs_btree_init_path(path);
1446         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1447                                     NILFS_BTREE_LEVEL_NODE_MIN);
1448         if (ret < 0)
1449                 goto out;
1450
1451
1452         dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
1453                 nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1454
1455         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1456         if (ret < 0)
1457                 goto out;
1458         nilfs_btree_commit_delete(btree, path, level, dat);
1459         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1460
1461 out:
1462         nilfs_btree_release_path(path);
1463         nilfs_btree_free_path(path);
1464         return ret;
1465 }
1466
1467 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1468 {
1469         struct nilfs_btree *btree;
1470         struct nilfs_btree_path *path;
1471         int ret;
1472
1473         btree = (struct nilfs_btree *)bmap;
1474         path = nilfs_btree_alloc_path();
1475         if (path == NULL)
1476                 return -ENOMEM;
1477         nilfs_btree_init_path(path);
1478
1479         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1480
1481         nilfs_btree_release_path(path);
1482         nilfs_btree_free_path(path);
1483
1484         return ret;
1485 }
1486
1487 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1488 {
1489         struct buffer_head *bh;
1490         struct nilfs_btree *btree;
1491         struct nilfs_btree_node *root, *node;
1492         __u64 maxkey, nextmaxkey;
1493         __u64 ptr;
1494         int nchildren, ret;
1495
1496         btree = (struct nilfs_btree *)bmap;
1497         root = nilfs_btree_get_root(btree);
1498         switch (nilfs_btree_height(btree)) {
1499         case 2:
1500                 bh = NULL;
1501                 node = root;
1502                 break;
1503         case 3:
1504                 nchildren = nilfs_btree_node_get_nchildren(root);
1505                 if (nchildren > 1)
1506                         return 0;
1507                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1508                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1509                 if (ret < 0)
1510                         return ret;
1511                 node = (struct nilfs_btree_node *)bh->b_data;
1512                 break;
1513         default:
1514                 return 0;
1515         }
1516
1517         nchildren = nilfs_btree_node_get_nchildren(node);
1518         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1519         nextmaxkey = (nchildren > 1) ?
1520                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1521         if (bh != NULL)
1522                 brelse(bh);
1523
1524         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1525 }
1526
1527 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1528                                    __u64 *keys, __u64 *ptrs, int nitems)
1529 {
1530         struct buffer_head *bh;
1531         struct nilfs_btree *btree;
1532         struct nilfs_btree_node *node, *root;
1533         __le64 *dkeys;
1534         __le64 *dptrs;
1535         __u64 ptr;
1536         int nchildren, i, ret;
1537
1538         btree = (struct nilfs_btree *)bmap;
1539         root = nilfs_btree_get_root(btree);
1540         switch (nilfs_btree_height(btree)) {
1541         case 2:
1542                 bh = NULL;
1543                 node = root;
1544                 break;
1545         case 3:
1546                 nchildren = nilfs_btree_node_get_nchildren(root);
1547                 WARN_ON(nchildren > 1);
1548                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1549                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1550                 if (ret < 0)
1551                         return ret;
1552                 node = (struct nilfs_btree_node *)bh->b_data;
1553                 break;
1554         default:
1555                 node = NULL;
1556                 return -EINVAL;
1557         }
1558
1559         nchildren = nilfs_btree_node_get_nchildren(node);
1560         if (nchildren < nitems)
1561                 nitems = nchildren;
1562         dkeys = nilfs_btree_node_dkeys(node);
1563         dptrs = nilfs_btree_node_dptrs(node, btree);
1564         for (i = 0; i < nitems; i++) {
1565                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1566                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1567         }
1568
1569         if (bh != NULL)
1570                 brelse(bh);
1571
1572         return nitems;
1573 }
1574
1575 static int
1576 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1577                                        union nilfs_bmap_ptr_req *dreq,
1578                                        union nilfs_bmap_ptr_req *nreq,
1579                                        struct buffer_head **bhp,
1580                                        struct nilfs_bmap_stats *stats)
1581 {
1582         struct buffer_head *bh;
1583         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1584         struct inode *dat = NULL;
1585         int ret;
1586
1587         stats->bs_nblocks = 0;
1588
1589         /* for data */
1590         /* cannot find near ptr */
1591         if (NILFS_BMAP_USE_VBN(bmap)) {
1592                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1593                 dat = nilfs_bmap_get_dat(bmap);
1594         }
1595
1596         ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
1597         if (ret < 0)
1598                 return ret;
1599
1600         *bhp = NULL;
1601         stats->bs_nblocks++;
1602         if (nreq != NULL) {
1603                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1604                 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
1605                 if (ret < 0)
1606                         goto err_out_dreq;
1607
1608                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1609                 if (ret < 0)
1610                         goto err_out_nreq;
1611
1612                 *bhp = bh;
1613                 stats->bs_nblocks++;
1614         }
1615
1616         /* success */
1617         return 0;
1618
1619         /* error */
1620  err_out_nreq:
1621         nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
1622  err_out_dreq:
1623         nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
1624         stats->bs_nblocks = 0;
1625         return ret;
1626
1627 }
1628
1629 static void
1630 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1631                                       __u64 key, __u64 ptr,
1632                                       const __u64 *keys, const __u64 *ptrs,
1633                                       int n,
1634                                       union nilfs_bmap_ptr_req *dreq,
1635                                       union nilfs_bmap_ptr_req *nreq,
1636                                       struct buffer_head *bh)
1637 {
1638         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1639         struct nilfs_btree_node *node;
1640         struct inode *dat;
1641         __u64 tmpptr;
1642
1643         /* free resources */
1644         if (bmap->b_ops->bop_clear != NULL)
1645                 bmap->b_ops->bop_clear(bmap);
1646
1647         /* ptr must be a pointer to a buffer head. */
1648         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1649
1650         /* convert and insert */
1651         dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
1652         nilfs_btree_init(bmap);
1653         if (nreq != NULL) {
1654                 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1655                 nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1656
1657                 /* create child node at level 1 */
1658                 node = (struct nilfs_btree_node *)bh->b_data;
1659                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1660                 nilfs_btree_node_insert(btree, node,
1661                                         key, dreq->bpr_ptr, n);
1662                 if (!buffer_dirty(bh))
1663                         nilfs_btnode_mark_dirty(bh);
1664                 if (!nilfs_bmap_dirty(bmap))
1665                         nilfs_bmap_set_dirty(bmap);
1666
1667                 brelse(bh);
1668
1669                 /* create root node at level 2 */
1670                 node = nilfs_btree_get_root(btree);
1671                 tmpptr = nreq->bpr_ptr;
1672                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1673                                       2, 1, &keys[0], &tmpptr);
1674         } else {
1675                 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1676
1677                 /* create root node at level 1 */
1678                 node = nilfs_btree_get_root(btree);
1679                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1680                                       1, n, keys, ptrs);
1681                 nilfs_btree_node_insert(btree, node,
1682                                         key, dreq->bpr_ptr, n);
1683                 if (!nilfs_bmap_dirty(bmap))
1684                         nilfs_bmap_set_dirty(bmap);
1685         }
1686
1687         if (NILFS_BMAP_USE_VBN(bmap))
1688                 nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1689 }
1690
1691 /**
1692  * nilfs_btree_convert_and_insert -
1693  * @bmap:
1694  * @key:
1695  * @ptr:
1696  * @keys:
1697  * @ptrs:
1698  * @n:
1699  */
1700 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1701                                    __u64 key, __u64 ptr,
1702                                    const __u64 *keys, const __u64 *ptrs, int n)
1703 {
1704         struct buffer_head *bh;
1705         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1706         struct nilfs_bmap_stats stats;
1707         int ret;
1708
1709         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1710                 di = &dreq;
1711                 ni = NULL;
1712         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1713                            1 << bmap->b_inode->i_blkbits)) {
1714                 di = &dreq;
1715                 ni = &nreq;
1716         } else {
1717                 di = NULL;
1718                 ni = NULL;
1719                 BUG();
1720         }
1721
1722         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1723                                                      &stats);
1724         if (ret < 0)
1725                 return ret;
1726         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1727                                               di, ni, bh);
1728         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1729         return 0;
1730 }
1731
1732 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1733                                    struct nilfs_btree_path *path,
1734                                    int level,
1735                                    struct buffer_head *bh)
1736 {
1737         while ((++level < nilfs_btree_height(btree) - 1) &&
1738                !buffer_dirty(path[level].bp_bh))
1739                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1740
1741         return 0;
1742 }
1743
1744 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1745                                         struct nilfs_btree_path *path,
1746                                         int level, struct inode *dat)
1747 {
1748         struct nilfs_btree_node *parent;
1749         int ret;
1750
1751         parent = nilfs_btree_get_node(btree, path, level + 1);
1752         path[level].bp_oldreq.bpr_ptr =
1753                 nilfs_btree_node_get_ptr(btree, parent,
1754                                          path[level + 1].bp_index);
1755         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1756         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1757                                        &path[level].bp_newreq.bpr_req);
1758         if (ret < 0)
1759                 return ret;
1760
1761         if (buffer_nilfs_node(path[level].bp_bh)) {
1762                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1763                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1764                 path[level].bp_ctxt.bh = path[level].bp_bh;
1765                 ret = nilfs_btnode_prepare_change_key(
1766                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1767                         &path[level].bp_ctxt);
1768                 if (ret < 0) {
1769                         nilfs_dat_abort_update(dat,
1770                                                &path[level].bp_oldreq.bpr_req,
1771                                                &path[level].bp_newreq.bpr_req);
1772                         return ret;
1773                 }
1774         }
1775
1776         return 0;
1777 }
1778
1779 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1780                                         struct nilfs_btree_path *path,
1781                                         int level, struct inode *dat)
1782 {
1783         struct nilfs_btree_node *parent;
1784
1785         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1786                                 &path[level].bp_newreq.bpr_req,
1787                                 btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
1788
1789         if (buffer_nilfs_node(path[level].bp_bh)) {
1790                 nilfs_btnode_commit_change_key(
1791                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1792                         &path[level].bp_ctxt);
1793                 path[level].bp_bh = path[level].bp_ctxt.bh;
1794         }
1795         set_buffer_nilfs_volatile(path[level].bp_bh);
1796
1797         parent = nilfs_btree_get_node(btree, path, level + 1);
1798         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1799                                  path[level].bp_newreq.bpr_ptr);
1800 }
1801
1802 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1803                                        struct nilfs_btree_path *path,
1804                                        int level, struct inode *dat)
1805 {
1806         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1807                                &path[level].bp_newreq.bpr_req);
1808         if (buffer_nilfs_node(path[level].bp_bh))
1809                 nilfs_btnode_abort_change_key(
1810                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1811                         &path[level].bp_ctxt);
1812 }
1813
1814 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1815                                            struct nilfs_btree_path *path,
1816                                            int minlevel, int *maxlevelp,
1817                                            struct inode *dat)
1818 {
1819         int level, ret;
1820
1821         level = minlevel;
1822         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1823                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1824                 if (ret < 0)
1825                         return ret;
1826         }
1827         while ((++level < nilfs_btree_height(btree) - 1) &&
1828                !buffer_dirty(path[level].bp_bh)) {
1829
1830                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1831                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1832                 if (ret < 0)
1833                         goto out;
1834         }
1835
1836         /* success */
1837         *maxlevelp = level - 1;
1838         return 0;
1839
1840         /* error */
1841  out:
1842         while (--level > minlevel)
1843                 nilfs_btree_abort_update_v(btree, path, level, dat);
1844         if (!buffer_nilfs_volatile(path[level].bp_bh))
1845                 nilfs_btree_abort_update_v(btree, path, level, dat);
1846         return ret;
1847 }
1848
1849 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1850                                            struct nilfs_btree_path *path,
1851                                            int minlevel, int maxlevel,
1852                                            struct buffer_head *bh,
1853                                            struct inode *dat)
1854 {
1855         int level;
1856
1857         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1858                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1859
1860         for (level = minlevel + 1; level <= maxlevel; level++)
1861                 nilfs_btree_commit_update_v(btree, path, level, dat);
1862 }
1863
1864 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1865                                    struct nilfs_btree_path *path,
1866                                    int level, struct buffer_head *bh)
1867 {
1868         int maxlevel, ret;
1869         struct nilfs_btree_node *parent;
1870         struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1871         __u64 ptr;
1872
1873         get_bh(bh);
1874         path[level].bp_bh = bh;
1875         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1876                                               dat);
1877         if (ret < 0)
1878                 goto out;
1879
1880         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1881                 parent = nilfs_btree_get_node(btree, path, level + 1);
1882                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1883                                                path[level + 1].bp_index);
1884                 ret = nilfs_dat_mark_dirty(dat, ptr);
1885                 if (ret < 0)
1886                         goto out;
1887         }
1888
1889         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1890
1891  out:
1892         brelse(path[level].bp_bh);
1893         path[level].bp_bh = NULL;
1894         return ret;
1895 }
1896
1897 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1898                                  struct buffer_head *bh)
1899 {
1900         struct nilfs_btree *btree;
1901         struct nilfs_btree_path *path;
1902         struct nilfs_btree_node *node;
1903         __u64 key;
1904         int level, ret;
1905
1906         WARN_ON(!buffer_dirty(bh));
1907
1908         btree = (struct nilfs_btree *)bmap;
1909         path = nilfs_btree_alloc_path();
1910         if (path == NULL)
1911                 return -ENOMEM;
1912         nilfs_btree_init_path(path);
1913
1914         if (buffer_nilfs_node(bh)) {
1915                 node = (struct nilfs_btree_node *)bh->b_data;
1916                 key = nilfs_btree_node_get_key(node, 0);
1917                 level = nilfs_btree_node_get_level(node);
1918         } else {
1919                 key = nilfs_bmap_data_get_key(bmap, bh);
1920                 level = NILFS_BTREE_LEVEL_DATA;
1921         }
1922
1923         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1924         if (ret < 0) {
1925                 if (unlikely(ret == -ENOENT))
1926                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1927                                __func__, (unsigned long long)key, level);
1928                 goto out;
1929         }
1930
1931         ret = NILFS_BMAP_USE_VBN(bmap) ?
1932                 nilfs_btree_propagate_v(btree, path, level, bh) :
1933                 nilfs_btree_propagate_p(btree, path, level, bh);
1934
1935  out:
1936         nilfs_btree_release_path(path);
1937         nilfs_btree_free_path(path);
1938
1939         return ret;
1940 }
1941
1942 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1943                                     struct buffer_head *bh)
1944 {
1945         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
1946 }
1947
1948 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1949                                          struct list_head *lists,
1950                                          struct buffer_head *bh)
1951 {
1952         struct list_head *head;
1953         struct buffer_head *cbh;
1954         struct nilfs_btree_node *node, *cnode;
1955         __u64 key, ckey;
1956         int level;
1957
1958         get_bh(bh);
1959         node = (struct nilfs_btree_node *)bh->b_data;
1960         key = nilfs_btree_node_get_key(node, 0);
1961         level = nilfs_btree_node_get_level(node);
1962         list_for_each(head, &lists[level]) {
1963                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1964                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1965                 ckey = nilfs_btree_node_get_key(cnode, 0);
1966                 if (key < ckey)
1967                         break;
1968         }
1969         list_add_tail(&bh->b_assoc_buffers, head);
1970 }
1971
1972 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1973                                              struct list_head *listp)
1974 {
1975         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1976         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1977         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1978         struct pagevec pvec;
1979         struct buffer_head *bh, *head;
1980         pgoff_t index = 0;
1981         int level, i;
1982
1983         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1984              level < NILFS_BTREE_LEVEL_MAX;
1985              level++)
1986                 INIT_LIST_HEAD(&lists[level]);
1987
1988         pagevec_init(&pvec, 0);
1989
1990         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
1991                                   PAGEVEC_SIZE)) {
1992                 for (i = 0; i < pagevec_count(&pvec); i++) {
1993                         bh = head = page_buffers(pvec.pages[i]);
1994                         do {
1995                                 if (buffer_dirty(bh))
1996                                         nilfs_btree_add_dirty_buffer(btree,
1997                                                                      lists, bh);
1998                         } while ((bh = bh->b_this_page) != head);
1999                 }
2000                 pagevec_release(&pvec);
2001                 cond_resched();
2002         }
2003
2004         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2005              level < NILFS_BTREE_LEVEL_MAX;
2006              level++)
2007                 list_splice(&lists[level], listp->prev);
2008 }
2009
2010 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2011                                 struct nilfs_btree_path *path,
2012                                 int level,
2013                                 struct buffer_head **bh,
2014                                 sector_t blocknr,
2015                                 union nilfs_binfo *binfo)
2016 {
2017         struct nilfs_btree_node *parent;
2018         __u64 key;
2019         __u64 ptr;
2020         int ret;
2021
2022         parent = nilfs_btree_get_node(btree, path, level + 1);
2023         ptr = nilfs_btree_node_get_ptr(btree, parent,
2024                                        path[level + 1].bp_index);
2025         if (buffer_nilfs_node(*bh)) {
2026                 path[level].bp_ctxt.oldkey = ptr;
2027                 path[level].bp_ctxt.newkey = blocknr;
2028                 path[level].bp_ctxt.bh = *bh;
2029                 ret = nilfs_btnode_prepare_change_key(
2030                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2031                         &path[level].bp_ctxt);
2032                 if (ret < 0)
2033                         return ret;
2034                 nilfs_btnode_commit_change_key(
2035                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2036                         &path[level].bp_ctxt);
2037                 *bh = path[level].bp_ctxt.bh;
2038         }
2039
2040         nilfs_btree_node_set_ptr(btree, parent,
2041                                  path[level + 1].bp_index, blocknr);
2042
2043         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2044         /* on-disk format */
2045         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2046         binfo->bi_dat.bi_level = level;
2047
2048         return 0;
2049 }
2050
2051 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2052                                 struct nilfs_btree_path *path,
2053                                 int level,
2054                                 struct buffer_head **bh,
2055                                 sector_t blocknr,
2056                                 union nilfs_binfo *binfo)
2057 {
2058         struct nilfs_btree_node *parent;
2059         struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
2060         __u64 key;
2061         __u64 ptr;
2062         union nilfs_bmap_ptr_req req;
2063         int ret;
2064
2065         parent = nilfs_btree_get_node(btree, path, level + 1);
2066         ptr = nilfs_btree_node_get_ptr(btree, parent,
2067                                        path[level + 1].bp_index);
2068         req.bpr_ptr = ptr;
2069         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2070         if (ret < 0)
2071                 return ret;
2072         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2073
2074         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2075         /* on-disk format */
2076         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2077         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2078
2079         return 0;
2080 }
2081
2082 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2083                               struct buffer_head **bh,
2084                               sector_t blocknr,
2085                               union nilfs_binfo *binfo)
2086 {
2087         struct nilfs_btree *btree;
2088         struct nilfs_btree_path *path;
2089         struct nilfs_btree_node *node;
2090         __u64 key;
2091         int level, ret;
2092
2093         btree = (struct nilfs_btree *)bmap;
2094         path = nilfs_btree_alloc_path();
2095         if (path == NULL)
2096                 return -ENOMEM;
2097         nilfs_btree_init_path(path);
2098
2099         if (buffer_nilfs_node(*bh)) {
2100                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2101                 key = nilfs_btree_node_get_key(node, 0);
2102                 level = nilfs_btree_node_get_level(node);
2103         } else {
2104                 key = nilfs_bmap_data_get_key(bmap, *bh);
2105                 level = NILFS_BTREE_LEVEL_DATA;
2106         }
2107
2108         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2109         if (ret < 0) {
2110                 WARN_ON(ret == -ENOENT);
2111                 goto out;
2112         }
2113
2114         ret = NILFS_BMAP_USE_VBN(bmap) ?
2115                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2116                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2117
2118  out:
2119         nilfs_btree_release_path(path);
2120         nilfs_btree_free_path(path);
2121
2122         return ret;
2123 }
2124
2125 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2126                                  struct buffer_head **bh,
2127                                  sector_t blocknr,
2128                                  union nilfs_binfo *binfo)
2129 {
2130         struct nilfs_btree_node *node;
2131         __u64 key;
2132         int ret;
2133
2134         ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
2135                              blocknr);
2136         if (ret < 0)
2137                 return ret;
2138
2139         if (buffer_nilfs_node(*bh)) {
2140                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2141                 key = nilfs_btree_node_get_key(node, 0);
2142         } else
2143                 key = nilfs_bmap_data_get_key(bmap, *bh);
2144
2145         /* on-disk format */
2146         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2147         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2148
2149         return 0;
2150 }
2151
2152 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2153 {
2154         struct buffer_head *bh;
2155         struct nilfs_btree *btree;
2156         struct nilfs_btree_path *path;
2157         __u64 ptr;
2158         int ret;
2159
2160         btree = (struct nilfs_btree *)bmap;
2161         path = nilfs_btree_alloc_path();
2162         if (path == NULL)
2163                 return -ENOMEM;
2164         nilfs_btree_init_path(path);
2165
2166         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2167         if (ret < 0) {
2168                 WARN_ON(ret == -ENOENT);
2169                 goto out;
2170         }
2171         ret = nilfs_btree_get_block(btree, ptr, &bh);
2172         if (ret < 0) {
2173                 WARN_ON(ret == -ENOENT);
2174                 goto out;
2175         }
2176
2177         if (!buffer_dirty(bh))
2178                 nilfs_btnode_mark_dirty(bh);
2179         brelse(bh);
2180         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2181                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2182
2183  out:
2184         nilfs_btree_release_path(path);
2185         nilfs_btree_free_path(path);
2186         return ret;
2187 }
2188
2189 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2190         .bop_lookup             =       nilfs_btree_lookup,
2191         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2192         .bop_insert             =       nilfs_btree_insert,
2193         .bop_delete             =       nilfs_btree_delete,
2194         .bop_clear              =       NULL,
2195
2196         .bop_propagate          =       nilfs_btree_propagate,
2197
2198         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2199
2200         .bop_assign             =       nilfs_btree_assign,
2201         .bop_mark               =       nilfs_btree_mark,
2202
2203         .bop_last_key           =       nilfs_btree_last_key,
2204         .bop_check_insert       =       NULL,
2205         .bop_check_delete       =       nilfs_btree_check_delete,
2206         .bop_gather_data        =       nilfs_btree_gather_data,
2207 };
2208
2209 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2210         .bop_lookup             =       NULL,
2211         .bop_lookup_contig      =       NULL,
2212         .bop_insert             =       NULL,
2213         .bop_delete             =       NULL,
2214         .bop_clear              =       NULL,
2215
2216         .bop_propagate          =       nilfs_btree_propagate_gc,
2217
2218         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2219
2220         .bop_assign             =       nilfs_btree_assign_gc,
2221         .bop_mark               =       NULL,
2222
2223         .bop_last_key           =       NULL,
2224         .bop_check_insert       =       NULL,
2225         .bop_check_delete       =       NULL,
2226         .bop_gather_data        =       NULL,
2227 };
2228
2229 int nilfs_btree_init(struct nilfs_bmap *bmap)
2230 {
2231         bmap->b_ops = &nilfs_btree_ops;
2232         return 0;
2233 }
2234
2235 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2236 {
2237         bmap->b_ops = &nilfs_btree_ops_gc;
2238 }