Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
[safe/jmp/linux-2.6] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33
34 /**
35  * struct nilfs_btree_path - A path on which B-tree operations are executed
36  * @bp_bh: buffer head of node block
37  * @bp_sib_bh: buffer head of sibling node block
38  * @bp_index: index of child node
39  * @bp_oldreq: ptr end request for old ptr
40  * @bp_newreq: ptr alloc request for new ptr
41  * @bp_op: rebalance operation
42  */
43 struct nilfs_btree_path {
44         struct buffer_head *bp_bh;
45         struct buffer_head *bp_sib_bh;
46         int bp_index;
47         union nilfs_bmap_ptr_req bp_oldreq;
48         union nilfs_bmap_ptr_req bp_newreq;
49         struct nilfs_btnode_chkey_ctxt bp_ctxt;
50         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
51                       int, __u64 *, __u64 *);
52 };
53
54 /*
55  * B-tree path operations
56  */
57
58 static struct kmem_cache *nilfs_btree_path_cache;
59
60 int __init nilfs_btree_path_cache_init(void)
61 {
62         nilfs_btree_path_cache =
63                 kmem_cache_create("nilfs2_btree_path_cache",
64                                   sizeof(struct nilfs_btree_path) *
65                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
66         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
67 }
68
69 void nilfs_btree_path_cache_destroy(void)
70 {
71         kmem_cache_destroy(nilfs_btree_path_cache);
72 }
73
74 static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void)
75 {
76         return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
77 }
78
79 static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
80 {
81         kmem_cache_free(nilfs_btree_path_cache, path);
82 }
83
84 static void nilfs_btree_init_path(struct nilfs_btree_path *path)
85 {
86         int level;
87
88         for (level = NILFS_BTREE_LEVEL_DATA;
89              level < NILFS_BTREE_LEVEL_MAX;
90              level++) {
91                 path[level].bp_bh = NULL;
92                 path[level].bp_sib_bh = NULL;
93                 path[level].bp_index = 0;
94                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
95                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
96                 path[level].bp_op = NULL;
97         }
98 }
99
100 static void nilfs_btree_release_path(struct nilfs_btree_path *path)
101 {
102         int level;
103
104         for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX;
105              level++)
106                 brelse(path[level].bp_bh);
107 }
108
109 /*
110  * B-tree node operations
111  */
112 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
113                                  struct buffer_head **bhp)
114 {
115         struct address_space *btnc =
116                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
117         return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
118 }
119
120 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
121                                      __u64 ptr, struct buffer_head **bhp)
122 {
123         struct address_space *btnc =
124                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
125         int ret;
126
127         ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
128         if (!ret)
129                 set_buffer_nilfs_volatile(*bhp);
130         return ret;
131 }
132
133 static inline int
134 nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
135 {
136         return node->bn_flags;
137 }
138
139 static inline void
140 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
141 {
142         node->bn_flags = flags;
143 }
144
145 static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
146 {
147         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
148 }
149
150 static inline int
151 nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
152 {
153         return node->bn_level;
154 }
155
156 static inline void
157 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
158 {
159         node->bn_level = level;
160 }
161
162 static inline int
163 nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
164 {
165         return le16_to_cpu(node->bn_nchildren);
166 }
167
168 static inline void
169 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
170 {
171         node->bn_nchildren = cpu_to_le16(nchildren);
172 }
173
174 static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
175 {
176         return 1 << btree->bt_bmap.b_inode->i_blkbits;
177 }
178
179 static inline int
180 nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
181                                const struct nilfs_btree *btree)
182 {
183         return nilfs_btree_node_root(node) ?
184                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
185                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
186 }
187
188 static inline int
189 nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
190                                const struct nilfs_btree *btree)
191 {
192         return nilfs_btree_node_root(node) ?
193                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
194                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
195 }
196
197 static inline __le64 *
198 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
199 {
200         return (__le64 *)((char *)(node + 1) +
201                           (nilfs_btree_node_root(node) ?
202                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
203 }
204
205 static inline __le64 *
206 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
207                        const struct nilfs_btree *btree)
208 {
209         return (__le64 *)(nilfs_btree_node_dkeys(node) +
210                           nilfs_btree_node_nchildren_max(node, btree));
211 }
212
213 static inline __u64
214 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
215 {
216         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
217 }
218
219 static inline void
220 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
221 {
222         *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
223 }
224
225 static inline __u64
226 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
227                          const struct nilfs_btree_node *node, int index)
228 {
229         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
230                                         index));
231 }
232
233 static inline void
234 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
235                          struct nilfs_btree_node *node, int index, __u64 ptr)
236 {
237         *(nilfs_btree_node_dptrs(node, btree) + index) =
238                 nilfs_bmap_ptr_to_dptr(ptr);
239 }
240
241 static void nilfs_btree_node_init(struct nilfs_btree *btree,
242                                   struct nilfs_btree_node *node,
243                                   int flags, int level, int nchildren,
244                                   const __u64 *keys, const __u64 *ptrs)
245 {
246         __le64 *dkeys;
247         __le64 *dptrs;
248         int i;
249
250         nilfs_btree_node_set_flags(node, flags);
251         nilfs_btree_node_set_level(node, level);
252         nilfs_btree_node_set_nchildren(node, nchildren);
253
254         dkeys = nilfs_btree_node_dkeys(node);
255         dptrs = nilfs_btree_node_dptrs(node, btree);
256         for (i = 0; i < nchildren; i++) {
257                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
258                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
259         }
260 }
261
262 /* Assume the buffer heads corresponding to left and right are locked. */
263 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
264                                        struct nilfs_btree_node *left,
265                                        struct nilfs_btree_node *right,
266                                        int n)
267 {
268         __le64 *ldkeys, *rdkeys;
269         __le64 *ldptrs, *rdptrs;
270         int lnchildren, rnchildren;
271
272         ldkeys = nilfs_btree_node_dkeys(left);
273         ldptrs = nilfs_btree_node_dptrs(left, btree);
274         lnchildren = nilfs_btree_node_get_nchildren(left);
275
276         rdkeys = nilfs_btree_node_dkeys(right);
277         rdptrs = nilfs_btree_node_dptrs(right, btree);
278         rnchildren = nilfs_btree_node_get_nchildren(right);
279
280         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
281         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
282         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
283         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
284
285         lnchildren += n;
286         rnchildren -= n;
287         nilfs_btree_node_set_nchildren(left, lnchildren);
288         nilfs_btree_node_set_nchildren(right, rnchildren);
289 }
290
291 /* Assume that the buffer heads corresponding to left and right are locked. */
292 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
293                                         struct nilfs_btree_node *left,
294                                         struct nilfs_btree_node *right,
295                                         int n)
296 {
297         __le64 *ldkeys, *rdkeys;
298         __le64 *ldptrs, *rdptrs;
299         int lnchildren, rnchildren;
300
301         ldkeys = nilfs_btree_node_dkeys(left);
302         ldptrs = nilfs_btree_node_dptrs(left, btree);
303         lnchildren = nilfs_btree_node_get_nchildren(left);
304
305         rdkeys = nilfs_btree_node_dkeys(right);
306         rdptrs = nilfs_btree_node_dptrs(right, btree);
307         rnchildren = nilfs_btree_node_get_nchildren(right);
308
309         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
310         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
311         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
312         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
313
314         lnchildren -= n;
315         rnchildren += n;
316         nilfs_btree_node_set_nchildren(left, lnchildren);
317         nilfs_btree_node_set_nchildren(right, rnchildren);
318 }
319
320 /* Assume that the buffer head corresponding to node is locked. */
321 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
322                                     struct nilfs_btree_node *node,
323                                     __u64 key, __u64 ptr, int index)
324 {
325         __le64 *dkeys;
326         __le64 *dptrs;
327         int nchildren;
328
329         dkeys = nilfs_btree_node_dkeys(node);
330         dptrs = nilfs_btree_node_dptrs(node, btree);
331         nchildren = nilfs_btree_node_get_nchildren(node);
332         if (index < nchildren) {
333                 memmove(dkeys + index + 1, dkeys + index,
334                         (nchildren - index) * sizeof(*dkeys));
335                 memmove(dptrs + index + 1, dptrs + index,
336                         (nchildren - index) * sizeof(*dptrs));
337         }
338         dkeys[index] = nilfs_bmap_key_to_dkey(key);
339         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
340         nchildren++;
341         nilfs_btree_node_set_nchildren(node, nchildren);
342 }
343
344 /* Assume that the buffer head corresponding to node is locked. */
345 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
346                                     struct nilfs_btree_node *node,
347                                     __u64 *keyp, __u64 *ptrp, int index)
348 {
349         __u64 key;
350         __u64 ptr;
351         __le64 *dkeys;
352         __le64 *dptrs;
353         int nchildren;
354
355         dkeys = nilfs_btree_node_dkeys(node);
356         dptrs = nilfs_btree_node_dptrs(node, btree);
357         key = nilfs_bmap_dkey_to_key(dkeys[index]);
358         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
359         nchildren = nilfs_btree_node_get_nchildren(node);
360         if (keyp != NULL)
361                 *keyp = key;
362         if (ptrp != NULL)
363                 *ptrp = ptr;
364
365         if (index < nchildren - 1) {
366                 memmove(dkeys + index, dkeys + index + 1,
367                         (nchildren - index - 1) * sizeof(*dkeys));
368                 memmove(dptrs + index, dptrs + index + 1,
369                         (nchildren - index - 1) * sizeof(*dptrs));
370         }
371         nchildren--;
372         nilfs_btree_node_set_nchildren(node, nchildren);
373 }
374
375 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
376                                    __u64 key, int *indexp)
377 {
378         __u64 nkey;
379         int index, low, high, s;
380
381         /* binary search */
382         low = 0;
383         high = nilfs_btree_node_get_nchildren(node) - 1;
384         index = 0;
385         s = 0;
386         while (low <= high) {
387                 index = (low + high) / 2;
388                 nkey = nilfs_btree_node_get_key(node, index);
389                 if (nkey == key) {
390                         s = 0;
391                         goto out;
392                 } else if (nkey < key) {
393                         low = index + 1;
394                         s = -1;
395                 } else {
396                         high = index - 1;
397                         s = 1;
398                 }
399         }
400
401         /* adjust index */
402         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
403                 if (s > 0 && index > 0)
404                         index--;
405         } else if (s < 0)
406                 index++;
407
408  out:
409         *indexp = index;
410
411         return s == 0;
412 }
413
414 static inline struct nilfs_btree_node *
415 nilfs_btree_get_root(const struct nilfs_btree *btree)
416 {
417         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
418 }
419
420 static inline struct nilfs_btree_node *
421 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
422 {
423         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
424 }
425
426 static inline struct nilfs_btree_node *
427 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
428 {
429         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
430 }
431
432 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
433 {
434         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
435 }
436
437 static inline struct nilfs_btree_node *
438 nilfs_btree_get_node(const struct nilfs_btree *btree,
439                      const struct nilfs_btree_path *path,
440                      int level)
441 {
442         return (level == nilfs_btree_height(btree) - 1) ?
443                 nilfs_btree_get_root(btree) :
444                 nilfs_btree_get_nonroot_node(path, level);
445 }
446
447 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
448                                  struct nilfs_btree_path *path,
449                                  __u64 key, __u64 *ptrp, int minlevel)
450 {
451         struct nilfs_btree_node *node;
452         __u64 ptr;
453         int level, index, found, ret;
454
455         node = nilfs_btree_get_root(btree);
456         level = nilfs_btree_node_get_level(node);
457         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
458                 return -ENOENT;
459
460         found = nilfs_btree_node_lookup(node, key, &index);
461         ptr = nilfs_btree_node_get_ptr(btree, node, index);
462         path[level].bp_bh = NULL;
463         path[level].bp_index = index;
464
465         for (level--; level >= minlevel; level--) {
466                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
467                 if (ret < 0)
468                         return ret;
469                 node = nilfs_btree_get_nonroot_node(path, level);
470                 BUG_ON(level != nilfs_btree_node_get_level(node));
471                 if (!found)
472                         found = nilfs_btree_node_lookup(node, key, &index);
473                 else
474                         index = 0;
475                 if (index < nilfs_btree_node_nchildren_max(node, btree))
476                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
477                 else {
478                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
479                         /* insert */
480                         ptr = NILFS_BMAP_INVALID_PTR;
481                 }
482                 path[level].bp_index = index;
483         }
484         if (!found)
485                 return -ENOENT;
486
487         if (ptrp != NULL)
488                 *ptrp = ptr;
489
490         return 0;
491 }
492
493 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
494                                       struct nilfs_btree_path *path,
495                                       __u64 *keyp, __u64 *ptrp)
496 {
497         struct nilfs_btree_node *node;
498         __u64 ptr;
499         int index, level, ret;
500
501         node = nilfs_btree_get_root(btree);
502         index = nilfs_btree_node_get_nchildren(node) - 1;
503         if (index < 0)
504                 return -ENOENT;
505         level = nilfs_btree_node_get_level(node);
506         ptr = nilfs_btree_node_get_ptr(btree, node, index);
507         path[level].bp_bh = NULL;
508         path[level].bp_index = index;
509
510         for (level--; level > 0; level--) {
511                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
512                 if (ret < 0)
513                         return ret;
514                 node = nilfs_btree_get_nonroot_node(path, level);
515                 BUG_ON(level != nilfs_btree_node_get_level(node));
516                 index = nilfs_btree_node_get_nchildren(node) - 1;
517                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
518                 path[level].bp_index = index;
519         }
520
521         if (keyp != NULL)
522                 *keyp = nilfs_btree_node_get_key(node, index);
523         if (ptrp != NULL)
524                 *ptrp = ptr;
525
526         return 0;
527 }
528
529 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
530                               __u64 key, int level, __u64 *ptrp)
531 {
532         struct nilfs_btree *btree;
533         struct nilfs_btree_path *path;
534         __u64 ptr;
535         int ret;
536
537         btree = (struct nilfs_btree *)bmap;
538         path = nilfs_btree_alloc_path();
539         if (path == NULL)
540                 return -ENOMEM;
541         nilfs_btree_init_path(path);
542
543         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
544
545         if (ptrp != NULL)
546                 *ptrp = ptr;
547
548         nilfs_btree_release_path(path);
549         nilfs_btree_free_path(path);
550
551         return ret;
552 }
553
554 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
555                                      __u64 key, __u64 *ptrp, unsigned maxblocks)
556 {
557         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
558         struct nilfs_btree_path *path;
559         struct nilfs_btree_node *node;
560         struct inode *dat = NULL;
561         __u64 ptr, ptr2;
562         sector_t blocknr;
563         int level = NILFS_BTREE_LEVEL_NODE_MIN;
564         int ret, cnt, index, maxlevel;
565
566         path = nilfs_btree_alloc_path();
567         if (path == NULL)
568                 return -ENOMEM;
569         nilfs_btree_init_path(path);
570         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
571         if (ret < 0)
572                 goto out;
573
574         if (NILFS_BMAP_USE_VBN(bmap)) {
575                 dat = nilfs_bmap_get_dat(bmap);
576                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
577                 if (ret < 0)
578                         goto out;
579                 ptr = blocknr;
580         }
581         cnt = 1;
582         if (cnt == maxblocks)
583                 goto end;
584
585         maxlevel = nilfs_btree_height(btree) - 1;
586         node = nilfs_btree_get_node(btree, path, level);
587         index = path[level].bp_index + 1;
588         for (;;) {
589                 while (index < nilfs_btree_node_get_nchildren(node)) {
590                         if (nilfs_btree_node_get_key(node, index) !=
591                             key + cnt)
592                                 goto end;
593                         ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
594                         if (dat) {
595                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
596                                 if (ret < 0)
597                                         goto out;
598                                 ptr2 = blocknr;
599                         }
600                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
601                                 goto end;
602                         index++;
603                         continue;
604                 }
605                 if (level == maxlevel)
606                         break;
607
608                 /* look-up right sibling node */
609                 node = nilfs_btree_get_node(btree, path, level + 1);
610                 index = path[level + 1].bp_index + 1;
611                 if (index >= nilfs_btree_node_get_nchildren(node) ||
612                     nilfs_btree_node_get_key(node, index) != key + cnt)
613                         break;
614                 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
615                 path[level + 1].bp_index = index;
616
617                 brelse(path[level].bp_bh);
618                 path[level].bp_bh = NULL;
619                 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
620                 if (ret < 0)
621                         goto out;
622                 node = nilfs_btree_get_nonroot_node(path, level);
623                 index = 0;
624                 path[level].bp_index = index;
625         }
626  end:
627         *ptrp = ptr;
628         ret = cnt;
629  out:
630         nilfs_btree_release_path(path);
631         nilfs_btree_free_path(path);
632         return ret;
633 }
634
635 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
636                                     struct nilfs_btree_path *path,
637                                     int level, __u64 key)
638 {
639         if (level < nilfs_btree_height(btree) - 1) {
640                 do {
641                         lock_buffer(path[level].bp_bh);
642                         nilfs_btree_node_set_key(
643                                 nilfs_btree_get_nonroot_node(path, level),
644                                 path[level].bp_index, key);
645                         if (!buffer_dirty(path[level].bp_bh))
646                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
647                         unlock_buffer(path[level].bp_bh);
648                 } while ((path[level].bp_index == 0) &&
649                          (++level < nilfs_btree_height(btree) - 1));
650         }
651
652         /* root */
653         if (level == nilfs_btree_height(btree) - 1) {
654                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
655                                          path[level].bp_index, key);
656         }
657 }
658
659 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
660                                   struct nilfs_btree_path *path,
661                                   int level, __u64 *keyp, __u64 *ptrp)
662 {
663         struct nilfs_btree_node *node;
664
665         if (level < nilfs_btree_height(btree) - 1) {
666                 lock_buffer(path[level].bp_bh);
667                 node = nilfs_btree_get_nonroot_node(path, level);
668                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
669                                         path[level].bp_index);
670                 if (!buffer_dirty(path[level].bp_bh))
671                         nilfs_btnode_mark_dirty(path[level].bp_bh);
672                 unlock_buffer(path[level].bp_bh);
673
674                 if (path[level].bp_index == 0)
675                         nilfs_btree_promote_key(btree, path, level + 1,
676                                                 nilfs_btree_node_get_key(node,
677                                                                          0));
678         } else {
679                 node = nilfs_btree_get_root(btree);
680                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
681                                         path[level].bp_index);
682         }
683 }
684
685 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
686                                    struct nilfs_btree_path *path,
687                                    int level, __u64 *keyp, __u64 *ptrp)
688 {
689         struct nilfs_btree_node *node, *left;
690         int nchildren, lnchildren, n, move;
691
692         lock_buffer(path[level].bp_bh);
693         lock_buffer(path[level].bp_sib_bh);
694
695         node = nilfs_btree_get_nonroot_node(path, level);
696         left = nilfs_btree_get_sib_node(path, level);
697         nchildren = nilfs_btree_node_get_nchildren(node);
698         lnchildren = nilfs_btree_node_get_nchildren(left);
699         move = 0;
700
701         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
702         if (n > path[level].bp_index) {
703                 /* move insert point */
704                 n--;
705                 move = 1;
706         }
707
708         nilfs_btree_node_move_left(btree, left, node, n);
709
710         if (!buffer_dirty(path[level].bp_bh))
711                 nilfs_btnode_mark_dirty(path[level].bp_bh);
712         if (!buffer_dirty(path[level].bp_sib_bh))
713                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
714
715         unlock_buffer(path[level].bp_bh);
716         unlock_buffer(path[level].bp_sib_bh);
717
718         nilfs_btree_promote_key(btree, path, level + 1,
719                                 nilfs_btree_node_get_key(node, 0));
720
721         if (move) {
722                 brelse(path[level].bp_bh);
723                 path[level].bp_bh = path[level].bp_sib_bh;
724                 path[level].bp_sib_bh = NULL;
725                 path[level].bp_index += lnchildren;
726                 path[level + 1].bp_index--;
727         } else {
728                 brelse(path[level].bp_sib_bh);
729                 path[level].bp_sib_bh = NULL;
730                 path[level].bp_index -= n;
731         }
732
733         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
734 }
735
736 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
737                                     struct nilfs_btree_path *path,
738                                     int level, __u64 *keyp, __u64 *ptrp)
739 {
740         struct nilfs_btree_node *node, *right;
741         int nchildren, rnchildren, n, move;
742
743         lock_buffer(path[level].bp_bh);
744         lock_buffer(path[level].bp_sib_bh);
745
746         node = nilfs_btree_get_nonroot_node(path, level);
747         right = nilfs_btree_get_sib_node(path, level);
748         nchildren = nilfs_btree_node_get_nchildren(node);
749         rnchildren = nilfs_btree_node_get_nchildren(right);
750         move = 0;
751
752         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
753         if (n > nchildren - path[level].bp_index) {
754                 /* move insert point */
755                 n--;
756                 move = 1;
757         }
758
759         nilfs_btree_node_move_right(btree, node, right, n);
760
761         if (!buffer_dirty(path[level].bp_bh))
762                 nilfs_btnode_mark_dirty(path[level].bp_bh);
763         if (!buffer_dirty(path[level].bp_sib_bh))
764                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
765
766         unlock_buffer(path[level].bp_bh);
767         unlock_buffer(path[level].bp_sib_bh);
768
769         path[level + 1].bp_index++;
770         nilfs_btree_promote_key(btree, path, level + 1,
771                                 nilfs_btree_node_get_key(right, 0));
772         path[level + 1].bp_index--;
773
774         if (move) {
775                 brelse(path[level].bp_bh);
776                 path[level].bp_bh = path[level].bp_sib_bh;
777                 path[level].bp_sib_bh = NULL;
778                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
779                 path[level + 1].bp_index++;
780         } else {
781                 brelse(path[level].bp_sib_bh);
782                 path[level].bp_sib_bh = NULL;
783         }
784
785         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
786 }
787
788 static void nilfs_btree_split(struct nilfs_btree *btree,
789                               struct nilfs_btree_path *path,
790                               int level, __u64 *keyp, __u64 *ptrp)
791 {
792         struct nilfs_btree_node *node, *right;
793         __u64 newkey;
794         __u64 newptr;
795         int nchildren, n, move;
796
797         lock_buffer(path[level].bp_bh);
798         lock_buffer(path[level].bp_sib_bh);
799
800         node = nilfs_btree_get_nonroot_node(path, level);
801         right = nilfs_btree_get_sib_node(path, level);
802         nchildren = nilfs_btree_node_get_nchildren(node);
803         move = 0;
804
805         n = (nchildren + 1) / 2;
806         if (n > nchildren - path[level].bp_index) {
807                 n--;
808                 move = 1;
809         }
810
811         nilfs_btree_node_move_right(btree, node, right, n);
812
813         if (!buffer_dirty(path[level].bp_bh))
814                 nilfs_btnode_mark_dirty(path[level].bp_bh);
815         if (!buffer_dirty(path[level].bp_sib_bh))
816                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
817
818         unlock_buffer(path[level].bp_bh);
819         unlock_buffer(path[level].bp_sib_bh);
820
821         newkey = nilfs_btree_node_get_key(right, 0);
822         newptr = path[level].bp_newreq.bpr_ptr;
823
824         if (move) {
825                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
826                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
827                                         path[level].bp_index);
828
829                 *keyp = nilfs_btree_node_get_key(right, 0);
830                 *ptrp = path[level].bp_newreq.bpr_ptr;
831
832                 brelse(path[level].bp_bh);
833                 path[level].bp_bh = path[level].bp_sib_bh;
834                 path[level].bp_sib_bh = NULL;
835         } else {
836                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
837
838                 *keyp = nilfs_btree_node_get_key(right, 0);
839                 *ptrp = path[level].bp_newreq.bpr_ptr;
840
841                 brelse(path[level].bp_sib_bh);
842                 path[level].bp_sib_bh = NULL;
843         }
844
845         path[level + 1].bp_index++;
846 }
847
848 static void nilfs_btree_grow(struct nilfs_btree *btree,
849                              struct nilfs_btree_path *path,
850                              int level, __u64 *keyp, __u64 *ptrp)
851 {
852         struct nilfs_btree_node *root, *child;
853         int n;
854
855         lock_buffer(path[level].bp_sib_bh);
856
857         root = nilfs_btree_get_root(btree);
858         child = nilfs_btree_get_sib_node(path, level);
859
860         n = nilfs_btree_node_get_nchildren(root);
861
862         nilfs_btree_node_move_right(btree, root, child, n);
863         nilfs_btree_node_set_level(root, level + 1);
864
865         if (!buffer_dirty(path[level].bp_sib_bh))
866                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
867
868         unlock_buffer(path[level].bp_sib_bh);
869
870         path[level].bp_bh = path[level].bp_sib_bh;
871         path[level].bp_sib_bh = NULL;
872
873         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
874
875         *keyp = nilfs_btree_node_get_key(child, 0);
876         *ptrp = path[level].bp_newreq.bpr_ptr;
877 }
878
879 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
880                                    const struct nilfs_btree_path *path)
881 {
882         struct nilfs_btree_node *node;
883         int level;
884
885         if (path == NULL)
886                 return NILFS_BMAP_INVALID_PTR;
887
888         /* left sibling */
889         level = NILFS_BTREE_LEVEL_NODE_MIN;
890         if (path[level].bp_index > 0) {
891                 node = nilfs_btree_get_node(btree, path, level);
892                 return nilfs_btree_node_get_ptr(btree, node,
893                                                 path[level].bp_index - 1);
894         }
895
896         /* parent */
897         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
898         if (level <= nilfs_btree_height(btree) - 1) {
899                 node = nilfs_btree_get_node(btree, path, level);
900                 return nilfs_btree_node_get_ptr(btree, node,
901                                                 path[level].bp_index);
902         }
903
904         return NILFS_BMAP_INVALID_PTR;
905 }
906
907 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
908                                        const struct nilfs_btree_path *path,
909                                        __u64 key)
910 {
911         __u64 ptr;
912
913         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
914         if (ptr != NILFS_BMAP_INVALID_PTR)
915                 /* sequential access */
916                 return ptr;
917         else {
918                 ptr = nilfs_btree_find_near(btree, path);
919                 if (ptr != NILFS_BMAP_INVALID_PTR)
920                         /* near */
921                         return ptr;
922         }
923         /* block group */
924         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
925 }
926
927 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
928                                      __u64 ptr)
929 {
930         btree->bt_bmap.b_last_allocated_key = key;
931         btree->bt_bmap.b_last_allocated_ptr = ptr;
932 }
933
934 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
935                                       struct nilfs_btree_path *path,
936                                       int *levelp, __u64 key, __u64 ptr,
937                                       struct nilfs_bmap_stats *stats)
938 {
939         struct buffer_head *bh;
940         struct nilfs_btree_node *node, *parent, *sib;
941         __u64 sibptr;
942         int pindex, level, ret;
943         struct inode *dat = NULL;
944
945         stats->bs_nblocks = 0;
946         level = NILFS_BTREE_LEVEL_DATA;
947
948         /* allocate a new ptr for data block */
949         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
950                 path[level].bp_newreq.bpr_ptr =
951                         nilfs_btree_find_target_v(btree, path, key);
952                 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
953         }
954
955         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
956                                            &path[level].bp_newreq, dat);
957         if (ret < 0)
958                 goto err_out_data;
959
960         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
961              level < nilfs_btree_height(btree) - 1;
962              level++) {
963                 node = nilfs_btree_get_nonroot_node(path, level);
964                 if (nilfs_btree_node_get_nchildren(node) <
965                     nilfs_btree_node_nchildren_max(node, btree)) {
966                         path[level].bp_op = nilfs_btree_do_insert;
967                         stats->bs_nblocks++;
968                         goto out;
969                 }
970
971                 parent = nilfs_btree_get_node(btree, path, level + 1);
972                 pindex = path[level + 1].bp_index;
973
974                 /* left sibling */
975                 if (pindex > 0) {
976                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
977                                                           pindex - 1);
978                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
979                         if (ret < 0)
980                                 goto err_out_child_node;
981                         sib = (struct nilfs_btree_node *)bh->b_data;
982                         if (nilfs_btree_node_get_nchildren(sib) <
983                             nilfs_btree_node_nchildren_max(sib, btree)) {
984                                 path[level].bp_sib_bh = bh;
985                                 path[level].bp_op = nilfs_btree_carry_left;
986                                 stats->bs_nblocks++;
987                                 goto out;
988                         } else
989                                 brelse(bh);
990                 }
991
992                 /* right sibling */
993                 if (pindex <
994                     nilfs_btree_node_get_nchildren(parent) - 1) {
995                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
996                                                           pindex + 1);
997                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
998                         if (ret < 0)
999                                 goto err_out_child_node;
1000                         sib = (struct nilfs_btree_node *)bh->b_data;
1001                         if (nilfs_btree_node_get_nchildren(sib) <
1002                             nilfs_btree_node_nchildren_max(sib, btree)) {
1003                                 path[level].bp_sib_bh = bh;
1004                                 path[level].bp_op = nilfs_btree_carry_right;
1005                                 stats->bs_nblocks++;
1006                                 goto out;
1007                         } else
1008                                 brelse(bh);
1009                 }
1010
1011                 /* split */
1012                 path[level].bp_newreq.bpr_ptr =
1013                         path[level - 1].bp_newreq.bpr_ptr + 1;
1014                 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1015                                                    &path[level].bp_newreq, dat);
1016                 if (ret < 0)
1017                         goto err_out_child_node;
1018                 ret = nilfs_btree_get_new_block(btree,
1019                                                 path[level].bp_newreq.bpr_ptr,
1020                                                 &bh);
1021                 if (ret < 0)
1022                         goto err_out_curr_node;
1023
1024                 stats->bs_nblocks++;
1025
1026                 lock_buffer(bh);
1027                 nilfs_btree_node_init(btree,
1028                                       (struct nilfs_btree_node *)bh->b_data,
1029                                       0, level, 0, NULL, NULL);
1030                 unlock_buffer(bh);
1031                 path[level].bp_sib_bh = bh;
1032                 path[level].bp_op = nilfs_btree_split;
1033         }
1034
1035         /* root */
1036         node = nilfs_btree_get_root(btree);
1037         if (nilfs_btree_node_get_nchildren(node) <
1038             nilfs_btree_node_nchildren_max(node, btree)) {
1039                 path[level].bp_op = nilfs_btree_do_insert;
1040                 stats->bs_nblocks++;
1041                 goto out;
1042         }
1043
1044         /* grow */
1045         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1046         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1047                                            &path[level].bp_newreq, dat);
1048         if (ret < 0)
1049                 goto err_out_child_node;
1050         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1051                                         &bh);
1052         if (ret < 0)
1053                 goto err_out_curr_node;
1054
1055         lock_buffer(bh);
1056         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1057                               0, level, 0, NULL, NULL);
1058         unlock_buffer(bh);
1059         path[level].bp_sib_bh = bh;
1060         path[level].bp_op = nilfs_btree_grow;
1061
1062         level++;
1063         path[level].bp_op = nilfs_btree_do_insert;
1064
1065         /* a newly-created node block and a data block are added */
1066         stats->bs_nblocks += 2;
1067
1068         /* success */
1069  out:
1070         *levelp = level;
1071         return ret;
1072
1073         /* error */
1074  err_out_curr_node:
1075         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1076                                    dat);
1077  err_out_child_node:
1078         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1079                 nilfs_btnode_delete(path[level].bp_sib_bh);
1080                 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1081                                            &path[level].bp_newreq, dat);
1082
1083         }
1084
1085         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1086                                    dat);
1087  err_out_data:
1088         *levelp = level;
1089         stats->bs_nblocks = 0;
1090         return ret;
1091 }
1092
1093 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1094                                       struct nilfs_btree_path *path,
1095                                       int maxlevel, __u64 key, __u64 ptr)
1096 {
1097         struct inode *dat = NULL;
1098         int level;
1099
1100         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1101         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1102         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
1103                 nilfs_btree_set_target_v(btree, key, ptr);
1104                 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1105         }
1106
1107         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1108                 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1109                                             &path[level - 1].bp_newreq, dat);
1110                 path[level].bp_op(btree, path, level, &key, &ptr);
1111         }
1112
1113         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1114                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1115 }
1116
1117 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1118 {
1119         struct nilfs_btree *btree;
1120         struct nilfs_btree_path *path;
1121         struct nilfs_bmap_stats stats;
1122         int level, ret;
1123
1124         btree = (struct nilfs_btree *)bmap;
1125         path = nilfs_btree_alloc_path();
1126         if (path == NULL)
1127                 return -ENOMEM;
1128         nilfs_btree_init_path(path);
1129
1130         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1131                                     NILFS_BTREE_LEVEL_NODE_MIN);
1132         if (ret != -ENOENT) {
1133                 if (ret == 0)
1134                         ret = -EEXIST;
1135                 goto out;
1136         }
1137
1138         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1139         if (ret < 0)
1140                 goto out;
1141         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1142         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1143
1144  out:
1145         nilfs_btree_release_path(path);
1146         nilfs_btree_free_path(path);
1147         return ret;
1148 }
1149
1150 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1151                                   struct nilfs_btree_path *path,
1152                                   int level, __u64 *keyp, __u64 *ptrp)
1153 {
1154         struct nilfs_btree_node *node;
1155
1156         if (level < nilfs_btree_height(btree) - 1) {
1157                 lock_buffer(path[level].bp_bh);
1158                 node = nilfs_btree_get_nonroot_node(path, level);
1159                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1160                                         path[level].bp_index);
1161                 if (!buffer_dirty(path[level].bp_bh))
1162                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1163                 unlock_buffer(path[level].bp_bh);
1164                 if (path[level].bp_index == 0)
1165                         nilfs_btree_promote_key(btree, path, level + 1,
1166                                 nilfs_btree_node_get_key(node, 0));
1167         } else {
1168                 node = nilfs_btree_get_root(btree);
1169                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1170                                         path[level].bp_index);
1171         }
1172 }
1173
1174 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1175                                     struct nilfs_btree_path *path,
1176                                     int level, __u64 *keyp, __u64 *ptrp)
1177 {
1178         struct nilfs_btree_node *node, *left;
1179         int nchildren, lnchildren, n;
1180
1181         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1182
1183         lock_buffer(path[level].bp_bh);
1184         lock_buffer(path[level].bp_sib_bh);
1185
1186         node = nilfs_btree_get_nonroot_node(path, level);
1187         left = nilfs_btree_get_sib_node(path, level);
1188         nchildren = nilfs_btree_node_get_nchildren(node);
1189         lnchildren = nilfs_btree_node_get_nchildren(left);
1190
1191         n = (nchildren + lnchildren) / 2 - nchildren;
1192
1193         nilfs_btree_node_move_right(btree, left, node, n);
1194
1195         if (!buffer_dirty(path[level].bp_bh))
1196                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1197         if (!buffer_dirty(path[level].bp_sib_bh))
1198                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1199
1200         unlock_buffer(path[level].bp_bh);
1201         unlock_buffer(path[level].bp_sib_bh);
1202
1203         nilfs_btree_promote_key(btree, path, level + 1,
1204                                 nilfs_btree_node_get_key(node, 0));
1205
1206         brelse(path[level].bp_sib_bh);
1207         path[level].bp_sib_bh = NULL;
1208         path[level].bp_index += n;
1209 }
1210
1211 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1212                                      struct nilfs_btree_path *path,
1213                                      int level, __u64 *keyp, __u64 *ptrp)
1214 {
1215         struct nilfs_btree_node *node, *right;
1216         int nchildren, rnchildren, n;
1217
1218         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1219
1220         lock_buffer(path[level].bp_bh);
1221         lock_buffer(path[level].bp_sib_bh);
1222
1223         node = nilfs_btree_get_nonroot_node(path, level);
1224         right = nilfs_btree_get_sib_node(path, level);
1225         nchildren = nilfs_btree_node_get_nchildren(node);
1226         rnchildren = nilfs_btree_node_get_nchildren(right);
1227
1228         n = (nchildren + rnchildren) / 2 - nchildren;
1229
1230         nilfs_btree_node_move_left(btree, node, right, n);
1231
1232         if (!buffer_dirty(path[level].bp_bh))
1233                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1234         if (!buffer_dirty(path[level].bp_sib_bh))
1235                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1236
1237         unlock_buffer(path[level].bp_bh);
1238         unlock_buffer(path[level].bp_sib_bh);
1239
1240         path[level + 1].bp_index++;
1241         nilfs_btree_promote_key(btree, path, level + 1,
1242                                 nilfs_btree_node_get_key(right, 0));
1243         path[level + 1].bp_index--;
1244
1245         brelse(path[level].bp_sib_bh);
1246         path[level].bp_sib_bh = NULL;
1247 }
1248
1249 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1250                                     struct nilfs_btree_path *path,
1251                                     int level, __u64 *keyp, __u64 *ptrp)
1252 {
1253         struct nilfs_btree_node *node, *left;
1254         int n;
1255
1256         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1257
1258         lock_buffer(path[level].bp_bh);
1259         lock_buffer(path[level].bp_sib_bh);
1260
1261         node = nilfs_btree_get_nonroot_node(path, level);
1262         left = nilfs_btree_get_sib_node(path, level);
1263
1264         n = nilfs_btree_node_get_nchildren(node);
1265
1266         nilfs_btree_node_move_left(btree, left, node, n);
1267
1268         if (!buffer_dirty(path[level].bp_sib_bh))
1269                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1270
1271         unlock_buffer(path[level].bp_bh);
1272         unlock_buffer(path[level].bp_sib_bh);
1273
1274         nilfs_btnode_delete(path[level].bp_bh);
1275         path[level].bp_bh = path[level].bp_sib_bh;
1276         path[level].bp_sib_bh = NULL;
1277         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1278 }
1279
1280 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1281                                      struct nilfs_btree_path *path,
1282                                      int level, __u64 *keyp, __u64 *ptrp)
1283 {
1284         struct nilfs_btree_node *node, *right;
1285         int n;
1286
1287         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1288
1289         lock_buffer(path[level].bp_bh);
1290         lock_buffer(path[level].bp_sib_bh);
1291
1292         node = nilfs_btree_get_nonroot_node(path, level);
1293         right = nilfs_btree_get_sib_node(path, level);
1294
1295         n = nilfs_btree_node_get_nchildren(right);
1296
1297         nilfs_btree_node_move_left(btree, node, right, n);
1298
1299         if (!buffer_dirty(path[level].bp_bh))
1300                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1301
1302         unlock_buffer(path[level].bp_bh);
1303         unlock_buffer(path[level].bp_sib_bh);
1304
1305         nilfs_btnode_delete(path[level].bp_sib_bh);
1306         path[level].bp_sib_bh = NULL;
1307         path[level + 1].bp_index++;
1308 }
1309
1310 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1311                                struct nilfs_btree_path *path,
1312                                int level, __u64 *keyp, __u64 *ptrp)
1313 {
1314         struct nilfs_btree_node *root, *child;
1315         int n;
1316
1317         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1318
1319         lock_buffer(path[level].bp_bh);
1320         root = nilfs_btree_get_root(btree);
1321         child = nilfs_btree_get_nonroot_node(path, level);
1322
1323         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1324         nilfs_btree_node_set_level(root, level);
1325         n = nilfs_btree_node_get_nchildren(child);
1326         nilfs_btree_node_move_left(btree, root, child, n);
1327         unlock_buffer(path[level].bp_bh);
1328
1329         nilfs_btnode_delete(path[level].bp_bh);
1330         path[level].bp_bh = NULL;
1331 }
1332
1333
1334 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1335                                       struct nilfs_btree_path *path,
1336                                       int *levelp,
1337                                       struct nilfs_bmap_stats *stats,
1338                                       struct inode *dat)
1339 {
1340         struct buffer_head *bh;
1341         struct nilfs_btree_node *node, *parent, *sib;
1342         __u64 sibptr;
1343         int pindex, level, ret;
1344
1345         ret = 0;
1346         stats->bs_nblocks = 0;
1347         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1348              level < nilfs_btree_height(btree) - 1;
1349              level++) {
1350                 node = nilfs_btree_get_nonroot_node(path, level);
1351                 path[level].bp_oldreq.bpr_ptr =
1352                         nilfs_btree_node_get_ptr(btree, node,
1353                                                  path[level].bp_index);
1354                 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1355                                                  &path[level].bp_oldreq, dat);
1356                 if (ret < 0)
1357                         goto err_out_child_node;
1358
1359                 if (nilfs_btree_node_get_nchildren(node) >
1360                     nilfs_btree_node_nchildren_min(node, btree)) {
1361                         path[level].bp_op = nilfs_btree_do_delete;
1362                         stats->bs_nblocks++;
1363                         goto out;
1364                 }
1365
1366                 parent = nilfs_btree_get_node(btree, path, level + 1);
1367                 pindex = path[level + 1].bp_index;
1368
1369                 if (pindex > 0) {
1370                         /* left sibling */
1371                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1372                                                           pindex - 1);
1373                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1374                         if (ret < 0)
1375                                 goto err_out_curr_node;
1376                         sib = (struct nilfs_btree_node *)bh->b_data;
1377                         if (nilfs_btree_node_get_nchildren(sib) >
1378                             nilfs_btree_node_nchildren_min(sib, btree)) {
1379                                 path[level].bp_sib_bh = bh;
1380                                 path[level].bp_op = nilfs_btree_borrow_left;
1381                                 stats->bs_nblocks++;
1382                                 goto out;
1383                         } else {
1384                                 path[level].bp_sib_bh = bh;
1385                                 path[level].bp_op = nilfs_btree_concat_left;
1386                                 stats->bs_nblocks++;
1387                                 /* continue; */
1388                         }
1389                 } else if (pindex <
1390                            nilfs_btree_node_get_nchildren(parent) - 1) {
1391                         /* right sibling */
1392                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1393                                                           pindex + 1);
1394                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1395                         if (ret < 0)
1396                                 goto err_out_curr_node;
1397                         sib = (struct nilfs_btree_node *)bh->b_data;
1398                         if (nilfs_btree_node_get_nchildren(sib) >
1399                             nilfs_btree_node_nchildren_min(sib, btree)) {
1400                                 path[level].bp_sib_bh = bh;
1401                                 path[level].bp_op = nilfs_btree_borrow_right;
1402                                 stats->bs_nblocks++;
1403                                 goto out;
1404                         } else {
1405                                 path[level].bp_sib_bh = bh;
1406                                 path[level].bp_op = nilfs_btree_concat_right;
1407                                 stats->bs_nblocks++;
1408                                 /* continue; */
1409                         }
1410                 } else {
1411                         /* no siblings */
1412                         /* the only child of the root node */
1413                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1414                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1415                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1416                                 path[level].bp_op = nilfs_btree_shrink;
1417                                 stats->bs_nblocks += 2;
1418                         } else {
1419                                 path[level].bp_op = nilfs_btree_do_delete;
1420                                 stats->bs_nblocks++;
1421                         }
1422
1423                         goto out;
1424
1425                 }
1426         }
1427
1428         node = nilfs_btree_get_root(btree);
1429         path[level].bp_oldreq.bpr_ptr =
1430                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1431
1432         ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1433                                          &path[level].bp_oldreq, dat);
1434         if (ret < 0)
1435                 goto err_out_child_node;
1436
1437         /* child of the root node is deleted */
1438         path[level].bp_op = nilfs_btree_do_delete;
1439         stats->bs_nblocks++;
1440
1441         /* success */
1442  out:
1443         *levelp = level;
1444         return ret;
1445
1446         /* error */
1447  err_out_curr_node:
1448         nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
1449  err_out_child_node:
1450         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1451                 brelse(path[level].bp_sib_bh);
1452                 nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1453                                          &path[level].bp_oldreq, dat);
1454         }
1455         *levelp = level;
1456         stats->bs_nblocks = 0;
1457         return ret;
1458 }
1459
1460 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1461                                       struct nilfs_btree_path *path,
1462                                       int maxlevel, struct inode *dat)
1463 {
1464         int level;
1465
1466         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1467                 nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1468                                           &path[level].bp_oldreq, dat);
1469                 path[level].bp_op(btree, path, level, NULL, NULL);
1470         }
1471
1472         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1473                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1474 }
1475
1476 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1477
1478 {
1479         struct nilfs_btree *btree;
1480         struct nilfs_btree_path *path;
1481         struct nilfs_bmap_stats stats;
1482         struct inode *dat;
1483         int level, ret;
1484
1485         btree = (struct nilfs_btree *)bmap;
1486         path = nilfs_btree_alloc_path();
1487         if (path == NULL)
1488                 return -ENOMEM;
1489         nilfs_btree_init_path(path);
1490         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1491                                     NILFS_BTREE_LEVEL_NODE_MIN);
1492         if (ret < 0)
1493                 goto out;
1494
1495
1496         dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
1497                 nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1498
1499         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1500         if (ret < 0)
1501                 goto out;
1502         nilfs_btree_commit_delete(btree, path, level, dat);
1503         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1504
1505 out:
1506         nilfs_btree_release_path(path);
1507         nilfs_btree_free_path(path);
1508         return ret;
1509 }
1510
1511 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1512 {
1513         struct nilfs_btree *btree;
1514         struct nilfs_btree_path *path;
1515         int ret;
1516
1517         btree = (struct nilfs_btree *)bmap;
1518         path = nilfs_btree_alloc_path();
1519         if (path == NULL)
1520                 return -ENOMEM;
1521         nilfs_btree_init_path(path);
1522
1523         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1524
1525         nilfs_btree_release_path(path);
1526         nilfs_btree_free_path(path);
1527
1528         return ret;
1529 }
1530
1531 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1532 {
1533         struct buffer_head *bh;
1534         struct nilfs_btree *btree;
1535         struct nilfs_btree_node *root, *node;
1536         __u64 maxkey, nextmaxkey;
1537         __u64 ptr;
1538         int nchildren, ret;
1539
1540         btree = (struct nilfs_btree *)bmap;
1541         root = nilfs_btree_get_root(btree);
1542         switch (nilfs_btree_height(btree)) {
1543         case 2:
1544                 bh = NULL;
1545                 node = root;
1546                 break;
1547         case 3:
1548                 nchildren = nilfs_btree_node_get_nchildren(root);
1549                 if (nchildren > 1)
1550                         return 0;
1551                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1552                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1553                 if (ret < 0)
1554                         return ret;
1555                 node = (struct nilfs_btree_node *)bh->b_data;
1556                 break;
1557         default:
1558                 return 0;
1559         }
1560
1561         nchildren = nilfs_btree_node_get_nchildren(node);
1562         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1563         nextmaxkey = (nchildren > 1) ?
1564                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1565         if (bh != NULL)
1566                 brelse(bh);
1567
1568         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1569 }
1570
1571 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1572                                    __u64 *keys, __u64 *ptrs, int nitems)
1573 {
1574         struct buffer_head *bh;
1575         struct nilfs_btree *btree;
1576         struct nilfs_btree_node *node, *root;
1577         __le64 *dkeys;
1578         __le64 *dptrs;
1579         __u64 ptr;
1580         int nchildren, i, ret;
1581
1582         btree = (struct nilfs_btree *)bmap;
1583         root = nilfs_btree_get_root(btree);
1584         switch (nilfs_btree_height(btree)) {
1585         case 2:
1586                 bh = NULL;
1587                 node = root;
1588                 break;
1589         case 3:
1590                 nchildren = nilfs_btree_node_get_nchildren(root);
1591                 WARN_ON(nchildren > 1);
1592                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1593                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1594                 if (ret < 0)
1595                         return ret;
1596                 node = (struct nilfs_btree_node *)bh->b_data;
1597                 break;
1598         default:
1599                 node = NULL;
1600                 return -EINVAL;
1601         }
1602
1603         nchildren = nilfs_btree_node_get_nchildren(node);
1604         if (nchildren < nitems)
1605                 nitems = nchildren;
1606         dkeys = nilfs_btree_node_dkeys(node);
1607         dptrs = nilfs_btree_node_dptrs(node, btree);
1608         for (i = 0; i < nitems; i++) {
1609                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1610                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1611         }
1612
1613         if (bh != NULL)
1614                 brelse(bh);
1615
1616         return nitems;
1617 }
1618
1619 static int
1620 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1621                                        union nilfs_bmap_ptr_req *dreq,
1622                                        union nilfs_bmap_ptr_req *nreq,
1623                                        struct buffer_head **bhp,
1624                                        struct nilfs_bmap_stats *stats)
1625 {
1626         struct buffer_head *bh;
1627         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1628         struct inode *dat = NULL;
1629         int ret;
1630
1631         stats->bs_nblocks = 0;
1632
1633         /* for data */
1634         /* cannot find near ptr */
1635         if (NILFS_BMAP_USE_VBN(bmap)) {
1636                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1637                 dat = nilfs_bmap_get_dat(bmap);
1638         }
1639
1640         ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
1641         if (ret < 0)
1642                 return ret;
1643
1644         *bhp = NULL;
1645         stats->bs_nblocks++;
1646         if (nreq != NULL) {
1647                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1648                 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
1649                 if (ret < 0)
1650                         goto err_out_dreq;
1651
1652                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1653                 if (ret < 0)
1654                         goto err_out_nreq;
1655
1656                 *bhp = bh;
1657                 stats->bs_nblocks++;
1658         }
1659
1660         /* success */
1661         return 0;
1662
1663         /* error */
1664  err_out_nreq:
1665         nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
1666  err_out_dreq:
1667         nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
1668         stats->bs_nblocks = 0;
1669         return ret;
1670
1671 }
1672
1673 static void
1674 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1675                                       __u64 key, __u64 ptr,
1676                                       const __u64 *keys, const __u64 *ptrs,
1677                                       int n,
1678                                       union nilfs_bmap_ptr_req *dreq,
1679                                       union nilfs_bmap_ptr_req *nreq,
1680                                       struct buffer_head *bh)
1681 {
1682         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1683         struct nilfs_btree_node *node;
1684         struct inode *dat;
1685         __u64 tmpptr;
1686
1687         /* free resources */
1688         if (bmap->b_ops->bop_clear != NULL)
1689                 bmap->b_ops->bop_clear(bmap);
1690
1691         /* ptr must be a pointer to a buffer head. */
1692         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1693
1694         /* convert and insert */
1695         dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
1696         nilfs_btree_init(bmap);
1697         if (nreq != NULL) {
1698                 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1699                 nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1700
1701                 /* create child node at level 1 */
1702                 lock_buffer(bh);
1703                 node = (struct nilfs_btree_node *)bh->b_data;
1704                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1705                 nilfs_btree_node_insert(btree, node,
1706                                         key, dreq->bpr_ptr, n);
1707                 if (!buffer_dirty(bh))
1708                         nilfs_btnode_mark_dirty(bh);
1709                 if (!nilfs_bmap_dirty(bmap))
1710                         nilfs_bmap_set_dirty(bmap);
1711
1712                 unlock_buffer(bh);
1713                 brelse(bh);
1714
1715                 /* create root node at level 2 */
1716                 node = nilfs_btree_get_root(btree);
1717                 tmpptr = nreq->bpr_ptr;
1718                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1719                                       2, 1, &keys[0], &tmpptr);
1720         } else {
1721                 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1722
1723                 /* create root node at level 1 */
1724                 node = nilfs_btree_get_root(btree);
1725                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1726                                       1, n, keys, ptrs);
1727                 nilfs_btree_node_insert(btree, node,
1728                                         key, dreq->bpr_ptr, n);
1729                 if (!nilfs_bmap_dirty(bmap))
1730                         nilfs_bmap_set_dirty(bmap);
1731         }
1732
1733         if (NILFS_BMAP_USE_VBN(bmap))
1734                 nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1735 }
1736
1737 /**
1738  * nilfs_btree_convert_and_insert -
1739  * @bmap:
1740  * @key:
1741  * @ptr:
1742  * @keys:
1743  * @ptrs:
1744  * @n:
1745  */
1746 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1747                                    __u64 key, __u64 ptr,
1748                                    const __u64 *keys, const __u64 *ptrs, int n)
1749 {
1750         struct buffer_head *bh;
1751         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1752         struct nilfs_bmap_stats stats;
1753         int ret;
1754
1755         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1756                 di = &dreq;
1757                 ni = NULL;
1758         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1759                            1 << bmap->b_inode->i_blkbits)) {
1760                 di = &dreq;
1761                 ni = &nreq;
1762         } else {
1763                 di = NULL;
1764                 ni = NULL;
1765                 BUG();
1766         }
1767
1768         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1769                                                      &stats);
1770         if (ret < 0)
1771                 return ret;
1772         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1773                                               di, ni, bh);
1774         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1775         return 0;
1776 }
1777
1778 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1779                                    struct nilfs_btree_path *path,
1780                                    int level,
1781                                    struct buffer_head *bh)
1782 {
1783         while ((++level < nilfs_btree_height(btree) - 1) &&
1784                !buffer_dirty(path[level].bp_bh))
1785                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1786
1787         return 0;
1788 }
1789
1790 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1791                                         struct nilfs_btree_path *path,
1792                                         int level, struct inode *dat)
1793 {
1794         struct nilfs_btree_node *parent;
1795         int ret;
1796
1797         parent = nilfs_btree_get_node(btree, path, level + 1);
1798         path[level].bp_oldreq.bpr_ptr =
1799                 nilfs_btree_node_get_ptr(btree, parent,
1800                                          path[level + 1].bp_index);
1801         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1802         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1803                                        &path[level].bp_newreq.bpr_req);
1804         if (ret < 0)
1805                 return ret;
1806
1807         if (buffer_nilfs_node(path[level].bp_bh)) {
1808                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1809                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1810                 path[level].bp_ctxt.bh = path[level].bp_bh;
1811                 ret = nilfs_btnode_prepare_change_key(
1812                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1813                         &path[level].bp_ctxt);
1814                 if (ret < 0) {
1815                         nilfs_dat_abort_update(dat,
1816                                                &path[level].bp_oldreq.bpr_req,
1817                                                &path[level].bp_newreq.bpr_req);
1818                         return ret;
1819                 }
1820         }
1821
1822         return 0;
1823 }
1824
1825 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1826                                         struct nilfs_btree_path *path,
1827                                         int level, struct inode *dat)
1828 {
1829         struct nilfs_btree_node *parent;
1830
1831         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1832                                 &path[level].bp_newreq.bpr_req,
1833                                 btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
1834
1835         if (buffer_nilfs_node(path[level].bp_bh)) {
1836                 nilfs_btnode_commit_change_key(
1837                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1838                         &path[level].bp_ctxt);
1839                 path[level].bp_bh = path[level].bp_ctxt.bh;
1840         }
1841         set_buffer_nilfs_volatile(path[level].bp_bh);
1842
1843         parent = nilfs_btree_get_node(btree, path, level + 1);
1844         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1845                                  path[level].bp_newreq.bpr_ptr);
1846 }
1847
1848 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1849                                        struct nilfs_btree_path *path,
1850                                        int level, struct inode *dat)
1851 {
1852         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1853                                &path[level].bp_newreq.bpr_req);
1854         if (buffer_nilfs_node(path[level].bp_bh))
1855                 nilfs_btnode_abort_change_key(
1856                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1857                         &path[level].bp_ctxt);
1858 }
1859
1860 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1861                                            struct nilfs_btree_path *path,
1862                                            int minlevel, int *maxlevelp,
1863                                            struct inode *dat)
1864 {
1865         int level, ret;
1866
1867         level = minlevel;
1868         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1869                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1870                 if (ret < 0)
1871                         return ret;
1872         }
1873         while ((++level < nilfs_btree_height(btree) - 1) &&
1874                !buffer_dirty(path[level].bp_bh)) {
1875
1876                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1877                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1878                 if (ret < 0)
1879                         goto out;
1880         }
1881
1882         /* success */
1883         *maxlevelp = level - 1;
1884         return 0;
1885
1886         /* error */
1887  out:
1888         while (--level > minlevel)
1889                 nilfs_btree_abort_update_v(btree, path, level, dat);
1890         if (!buffer_nilfs_volatile(path[level].bp_bh))
1891                 nilfs_btree_abort_update_v(btree, path, level, dat);
1892         return ret;
1893 }
1894
1895 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1896                                            struct nilfs_btree_path *path,
1897                                            int minlevel, int maxlevel,
1898                                            struct buffer_head *bh,
1899                                            struct inode *dat)
1900 {
1901         int level;
1902
1903         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1904                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1905
1906         for (level = minlevel + 1; level <= maxlevel; level++)
1907                 nilfs_btree_commit_update_v(btree, path, level, dat);
1908 }
1909
1910 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1911                                    struct nilfs_btree_path *path,
1912                                    int level, struct buffer_head *bh)
1913 {
1914         int maxlevel, ret;
1915         struct nilfs_btree_node *parent;
1916         struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1917         __u64 ptr;
1918
1919         get_bh(bh);
1920         path[level].bp_bh = bh;
1921         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1922                                               dat);
1923         if (ret < 0)
1924                 goto out;
1925
1926         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1927                 parent = nilfs_btree_get_node(btree, path, level + 1);
1928                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1929                                                path[level + 1].bp_index);
1930                 ret = nilfs_dat_mark_dirty(dat, ptr);
1931                 if (ret < 0)
1932                         goto out;
1933         }
1934
1935         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1936
1937  out:
1938         brelse(path[level].bp_bh);
1939         path[level].bp_bh = NULL;
1940         return ret;
1941 }
1942
1943 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1944                                  struct buffer_head *bh)
1945 {
1946         struct nilfs_btree *btree;
1947         struct nilfs_btree_path *path;
1948         struct nilfs_btree_node *node;
1949         __u64 key;
1950         int level, ret;
1951
1952         WARN_ON(!buffer_dirty(bh));
1953
1954         btree = (struct nilfs_btree *)bmap;
1955         path = nilfs_btree_alloc_path();
1956         if (path == NULL)
1957                 return -ENOMEM;
1958         nilfs_btree_init_path(path);
1959
1960         if (buffer_nilfs_node(bh)) {
1961                 node = (struct nilfs_btree_node *)bh->b_data;
1962                 key = nilfs_btree_node_get_key(node, 0);
1963                 level = nilfs_btree_node_get_level(node);
1964         } else {
1965                 key = nilfs_bmap_data_get_key(bmap, bh);
1966                 level = NILFS_BTREE_LEVEL_DATA;
1967         }
1968
1969         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1970         if (ret < 0) {
1971                 if (unlikely(ret == -ENOENT))
1972                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1973                                __func__, (unsigned long long)key, level);
1974                 goto out;
1975         }
1976
1977         ret = NILFS_BMAP_USE_VBN(bmap) ?
1978                 nilfs_btree_propagate_v(btree, path, level, bh) :
1979                 nilfs_btree_propagate_p(btree, path, level, bh);
1980
1981  out:
1982         nilfs_btree_release_path(path);
1983         nilfs_btree_free_path(path);
1984
1985         return ret;
1986 }
1987
1988 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1989                                     struct buffer_head *bh)
1990 {
1991         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
1992 }
1993
1994 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1995                                          struct list_head *lists,
1996                                          struct buffer_head *bh)
1997 {
1998         struct list_head *head;
1999         struct buffer_head *cbh;
2000         struct nilfs_btree_node *node, *cnode;
2001         __u64 key, ckey;
2002         int level;
2003
2004         get_bh(bh);
2005         node = (struct nilfs_btree_node *)bh->b_data;
2006         key = nilfs_btree_node_get_key(node, 0);
2007         level = nilfs_btree_node_get_level(node);
2008         list_for_each(head, &lists[level]) {
2009                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2010                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2011                 ckey = nilfs_btree_node_get_key(cnode, 0);
2012                 if (key < ckey)
2013                         break;
2014         }
2015         list_add_tail(&bh->b_assoc_buffers, head);
2016 }
2017
2018 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
2019                                              struct list_head *listp)
2020 {
2021         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
2022         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
2023         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2024         struct pagevec pvec;
2025         struct buffer_head *bh, *head;
2026         pgoff_t index = 0;
2027         int level, i;
2028
2029         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2030              level < NILFS_BTREE_LEVEL_MAX;
2031              level++)
2032                 INIT_LIST_HEAD(&lists[level]);
2033
2034         pagevec_init(&pvec, 0);
2035
2036         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2037                                   PAGEVEC_SIZE)) {
2038                 for (i = 0; i < pagevec_count(&pvec); i++) {
2039                         bh = head = page_buffers(pvec.pages[i]);
2040                         do {
2041                                 if (buffer_dirty(bh))
2042                                         nilfs_btree_add_dirty_buffer(btree,
2043                                                                      lists, bh);
2044                         } while ((bh = bh->b_this_page) != head);
2045                 }
2046                 pagevec_release(&pvec);
2047                 cond_resched();
2048         }
2049
2050         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2051              level < NILFS_BTREE_LEVEL_MAX;
2052              level++)
2053                 list_splice(&lists[level], listp->prev);
2054 }
2055
2056 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2057                                 struct nilfs_btree_path *path,
2058                                 int level,
2059                                 struct buffer_head **bh,
2060                                 sector_t blocknr,
2061                                 union nilfs_binfo *binfo)
2062 {
2063         struct nilfs_btree_node *parent;
2064         __u64 key;
2065         __u64 ptr;
2066         int ret;
2067
2068         parent = nilfs_btree_get_node(btree, path, level + 1);
2069         ptr = nilfs_btree_node_get_ptr(btree, parent,
2070                                        path[level + 1].bp_index);
2071         if (buffer_nilfs_node(*bh)) {
2072                 path[level].bp_ctxt.oldkey = ptr;
2073                 path[level].bp_ctxt.newkey = blocknr;
2074                 path[level].bp_ctxt.bh = *bh;
2075                 ret = nilfs_btnode_prepare_change_key(
2076                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2077                         &path[level].bp_ctxt);
2078                 if (ret < 0)
2079                         return ret;
2080                 nilfs_btnode_commit_change_key(
2081                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2082                         &path[level].bp_ctxt);
2083                 *bh = path[level].bp_ctxt.bh;
2084         }
2085
2086         nilfs_btree_node_set_ptr(btree, parent,
2087                                  path[level + 1].bp_index, blocknr);
2088
2089         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2090         /* on-disk format */
2091         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2092         binfo->bi_dat.bi_level = level;
2093
2094         return 0;
2095 }
2096
2097 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2098                                 struct nilfs_btree_path *path,
2099                                 int level,
2100                                 struct buffer_head **bh,
2101                                 sector_t blocknr,
2102                                 union nilfs_binfo *binfo)
2103 {
2104         struct nilfs_btree_node *parent;
2105         struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
2106         __u64 key;
2107         __u64 ptr;
2108         union nilfs_bmap_ptr_req req;
2109         int ret;
2110
2111         parent = nilfs_btree_get_node(btree, path, level + 1);
2112         ptr = nilfs_btree_node_get_ptr(btree, parent,
2113                                        path[level + 1].bp_index);
2114         req.bpr_ptr = ptr;
2115         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2116         if (ret < 0)
2117                 return ret;
2118         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2119
2120         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2121         /* on-disk format */
2122         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2123         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2124
2125         return 0;
2126 }
2127
2128 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2129                               struct buffer_head **bh,
2130                               sector_t blocknr,
2131                               union nilfs_binfo *binfo)
2132 {
2133         struct nilfs_btree *btree;
2134         struct nilfs_btree_path *path;
2135         struct nilfs_btree_node *node;
2136         __u64 key;
2137         int level, ret;
2138
2139         btree = (struct nilfs_btree *)bmap;
2140         path = nilfs_btree_alloc_path();
2141         if (path == NULL)
2142                 return -ENOMEM;
2143         nilfs_btree_init_path(path);
2144
2145         if (buffer_nilfs_node(*bh)) {
2146                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2147                 key = nilfs_btree_node_get_key(node, 0);
2148                 level = nilfs_btree_node_get_level(node);
2149         } else {
2150                 key = nilfs_bmap_data_get_key(bmap, *bh);
2151                 level = NILFS_BTREE_LEVEL_DATA;
2152         }
2153
2154         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2155         if (ret < 0) {
2156                 WARN_ON(ret == -ENOENT);
2157                 goto out;
2158         }
2159
2160         ret = NILFS_BMAP_USE_VBN(bmap) ?
2161                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2162                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2163
2164  out:
2165         nilfs_btree_release_path(path);
2166         nilfs_btree_free_path(path);
2167
2168         return ret;
2169 }
2170
2171 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2172                                  struct buffer_head **bh,
2173                                  sector_t blocknr,
2174                                  union nilfs_binfo *binfo)
2175 {
2176         struct nilfs_btree_node *node;
2177         __u64 key;
2178         int ret;
2179
2180         ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
2181                              blocknr);
2182         if (ret < 0)
2183                 return ret;
2184
2185         if (buffer_nilfs_node(*bh)) {
2186                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2187                 key = nilfs_btree_node_get_key(node, 0);
2188         } else
2189                 key = nilfs_bmap_data_get_key(bmap, *bh);
2190
2191         /* on-disk format */
2192         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2193         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2194
2195         return 0;
2196 }
2197
2198 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2199 {
2200         struct buffer_head *bh;
2201         struct nilfs_btree *btree;
2202         struct nilfs_btree_path *path;
2203         __u64 ptr;
2204         int ret;
2205
2206         btree = (struct nilfs_btree *)bmap;
2207         path = nilfs_btree_alloc_path();
2208         if (path == NULL)
2209                 return -ENOMEM;
2210         nilfs_btree_init_path(path);
2211
2212         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2213         if (ret < 0) {
2214                 WARN_ON(ret == -ENOENT);
2215                 goto out;
2216         }
2217         ret = nilfs_btree_get_block(btree, ptr, &bh);
2218         if (ret < 0) {
2219                 WARN_ON(ret == -ENOENT);
2220                 goto out;
2221         }
2222
2223         if (!buffer_dirty(bh))
2224                 nilfs_btnode_mark_dirty(bh);
2225         brelse(bh);
2226         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2227                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2228
2229  out:
2230         nilfs_btree_release_path(path);
2231         nilfs_btree_free_path(path);
2232         return ret;
2233 }
2234
2235 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2236         .bop_lookup             =       nilfs_btree_lookup,
2237         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2238         .bop_insert             =       nilfs_btree_insert,
2239         .bop_delete             =       nilfs_btree_delete,
2240         .bop_clear              =       NULL,
2241
2242         .bop_propagate          =       nilfs_btree_propagate,
2243
2244         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2245
2246         .bop_assign             =       nilfs_btree_assign,
2247         .bop_mark               =       nilfs_btree_mark,
2248
2249         .bop_last_key           =       nilfs_btree_last_key,
2250         .bop_check_insert       =       NULL,
2251         .bop_check_delete       =       nilfs_btree_check_delete,
2252         .bop_gather_data        =       nilfs_btree_gather_data,
2253 };
2254
2255 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2256         .bop_lookup             =       NULL,
2257         .bop_lookup_contig      =       NULL,
2258         .bop_insert             =       NULL,
2259         .bop_delete             =       NULL,
2260         .bop_clear              =       NULL,
2261
2262         .bop_propagate          =       nilfs_btree_propagate_gc,
2263
2264         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2265
2266         .bop_assign             =       nilfs_btree_assign_gc,
2267         .bop_mark               =       NULL,
2268
2269         .bop_last_key           =       NULL,
2270         .bop_check_insert       =       NULL,
2271         .bop_check_delete       =       NULL,
2272         .bop_gather_data        =       NULL,
2273 };
2274
2275 int nilfs_btree_init(struct nilfs_bmap *bmap)
2276 {
2277         bmap->b_ops = &nilfs_btree_ops;
2278         return 0;
2279 }
2280
2281 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2282 {
2283         bmap->b_ops = &nilfs_btree_ops_gc;
2284 }