nilfs2: add update functions of virtual block address to dat
[safe/jmp/linux-2.6] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33
34 /**
35  * struct nilfs_btree_path - A path on which B-tree operations are executed
36  * @bp_bh: buffer head of node block
37  * @bp_sib_bh: buffer head of sibling node block
38  * @bp_index: index of child node
39  * @bp_oldreq: ptr end request for old ptr
40  * @bp_newreq: ptr alloc request for new ptr
41  * @bp_op: rebalance operation
42  */
43 struct nilfs_btree_path {
44         struct buffer_head *bp_bh;
45         struct buffer_head *bp_sib_bh;
46         int bp_index;
47         union nilfs_bmap_ptr_req bp_oldreq;
48         union nilfs_bmap_ptr_req bp_newreq;
49         struct nilfs_btnode_chkey_ctxt bp_ctxt;
50         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
51                       int, __u64 *, __u64 *);
52 };
53
54 /*
55  * B-tree path operations
56  */
57
58 static struct kmem_cache *nilfs_btree_path_cache;
59
60 int __init nilfs_btree_path_cache_init(void)
61 {
62         nilfs_btree_path_cache =
63                 kmem_cache_create("nilfs2_btree_path_cache",
64                                   sizeof(struct nilfs_btree_path) *
65                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
66         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
67 }
68
69 void nilfs_btree_path_cache_destroy(void)
70 {
71         kmem_cache_destroy(nilfs_btree_path_cache);
72 }
73
74 static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void)
75 {
76         return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
77 }
78
79 static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
80 {
81         kmem_cache_free(nilfs_btree_path_cache, path);
82 }
83
84 static void nilfs_btree_init_path(struct nilfs_btree_path *path)
85 {
86         int level;
87
88         for (level = NILFS_BTREE_LEVEL_DATA;
89              level < NILFS_BTREE_LEVEL_MAX;
90              level++) {
91                 path[level].bp_bh = NULL;
92                 path[level].bp_sib_bh = NULL;
93                 path[level].bp_index = 0;
94                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
95                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
96                 path[level].bp_op = NULL;
97         }
98 }
99
100 static void nilfs_btree_release_path(struct nilfs_btree_path *path)
101 {
102         int level;
103
104         for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX;
105              level++)
106                 brelse(path[level].bp_bh);
107 }
108
109 /*
110  * B-tree node operations
111  */
112 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
113                                  struct buffer_head **bhp)
114 {
115         struct address_space *btnc =
116                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
117         return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
118 }
119
120 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
121                                      __u64 ptr, struct buffer_head **bhp)
122 {
123         struct address_space *btnc =
124                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
125         int ret;
126
127         ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
128         if (!ret)
129                 set_buffer_nilfs_volatile(*bhp);
130         return ret;
131 }
132
133 static inline int
134 nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
135 {
136         return node->bn_flags;
137 }
138
139 static inline void
140 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
141 {
142         node->bn_flags = flags;
143 }
144
145 static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
146 {
147         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
148 }
149
150 static inline int
151 nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
152 {
153         return node->bn_level;
154 }
155
156 static inline void
157 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
158 {
159         node->bn_level = level;
160 }
161
162 static inline int
163 nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
164 {
165         return le16_to_cpu(node->bn_nchildren);
166 }
167
168 static inline void
169 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
170 {
171         node->bn_nchildren = cpu_to_le16(nchildren);
172 }
173
174 static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
175 {
176         return 1 << btree->bt_bmap.b_inode->i_blkbits;
177 }
178
179 static inline int
180 nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
181                                const struct nilfs_btree *btree)
182 {
183         return nilfs_btree_node_root(node) ?
184                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
185                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
186 }
187
188 static inline int
189 nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
190                                const struct nilfs_btree *btree)
191 {
192         return nilfs_btree_node_root(node) ?
193                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
194                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
195 }
196
197 static inline __le64 *
198 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
199 {
200         return (__le64 *)((char *)(node + 1) +
201                           (nilfs_btree_node_root(node) ?
202                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
203 }
204
205 static inline __le64 *
206 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
207                        const struct nilfs_btree *btree)
208 {
209         return (__le64 *)(nilfs_btree_node_dkeys(node) +
210                           nilfs_btree_node_nchildren_max(node, btree));
211 }
212
213 static inline __u64
214 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
215 {
216         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
217 }
218
219 static inline void
220 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
221 {
222         *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
223 }
224
225 static inline __u64
226 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
227                          const struct nilfs_btree_node *node, int index)
228 {
229         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
230                                         index));
231 }
232
233 static inline void
234 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
235                          struct nilfs_btree_node *node, int index, __u64 ptr)
236 {
237         *(nilfs_btree_node_dptrs(node, btree) + index) =
238                 nilfs_bmap_ptr_to_dptr(ptr);
239 }
240
241 static void nilfs_btree_node_init(struct nilfs_btree *btree,
242                                   struct nilfs_btree_node *node,
243                                   int flags, int level, int nchildren,
244                                   const __u64 *keys, const __u64 *ptrs)
245 {
246         __le64 *dkeys;
247         __le64 *dptrs;
248         int i;
249
250         nilfs_btree_node_set_flags(node, flags);
251         nilfs_btree_node_set_level(node, level);
252         nilfs_btree_node_set_nchildren(node, nchildren);
253
254         dkeys = nilfs_btree_node_dkeys(node);
255         dptrs = nilfs_btree_node_dptrs(node, btree);
256         for (i = 0; i < nchildren; i++) {
257                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
258                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
259         }
260 }
261
262 /* Assume the buffer heads corresponding to left and right are locked. */
263 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
264                                        struct nilfs_btree_node *left,
265                                        struct nilfs_btree_node *right,
266                                        int n)
267 {
268         __le64 *ldkeys, *rdkeys;
269         __le64 *ldptrs, *rdptrs;
270         int lnchildren, rnchildren;
271
272         ldkeys = nilfs_btree_node_dkeys(left);
273         ldptrs = nilfs_btree_node_dptrs(left, btree);
274         lnchildren = nilfs_btree_node_get_nchildren(left);
275
276         rdkeys = nilfs_btree_node_dkeys(right);
277         rdptrs = nilfs_btree_node_dptrs(right, btree);
278         rnchildren = nilfs_btree_node_get_nchildren(right);
279
280         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
281         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
282         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
283         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
284
285         lnchildren += n;
286         rnchildren -= n;
287         nilfs_btree_node_set_nchildren(left, lnchildren);
288         nilfs_btree_node_set_nchildren(right, rnchildren);
289 }
290
291 /* Assume that the buffer heads corresponding to left and right are locked. */
292 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
293                                         struct nilfs_btree_node *left,
294                                         struct nilfs_btree_node *right,
295                                         int n)
296 {
297         __le64 *ldkeys, *rdkeys;
298         __le64 *ldptrs, *rdptrs;
299         int lnchildren, rnchildren;
300
301         ldkeys = nilfs_btree_node_dkeys(left);
302         ldptrs = nilfs_btree_node_dptrs(left, btree);
303         lnchildren = nilfs_btree_node_get_nchildren(left);
304
305         rdkeys = nilfs_btree_node_dkeys(right);
306         rdptrs = nilfs_btree_node_dptrs(right, btree);
307         rnchildren = nilfs_btree_node_get_nchildren(right);
308
309         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
310         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
311         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
312         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
313
314         lnchildren -= n;
315         rnchildren += n;
316         nilfs_btree_node_set_nchildren(left, lnchildren);
317         nilfs_btree_node_set_nchildren(right, rnchildren);
318 }
319
320 /* Assume that the buffer head corresponding to node is locked. */
321 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
322                                     struct nilfs_btree_node *node,
323                                     __u64 key, __u64 ptr, int index)
324 {
325         __le64 *dkeys;
326         __le64 *dptrs;
327         int nchildren;
328
329         dkeys = nilfs_btree_node_dkeys(node);
330         dptrs = nilfs_btree_node_dptrs(node, btree);
331         nchildren = nilfs_btree_node_get_nchildren(node);
332         if (index < nchildren) {
333                 memmove(dkeys + index + 1, dkeys + index,
334                         (nchildren - index) * sizeof(*dkeys));
335                 memmove(dptrs + index + 1, dptrs + index,
336                         (nchildren - index) * sizeof(*dptrs));
337         }
338         dkeys[index] = nilfs_bmap_key_to_dkey(key);
339         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
340         nchildren++;
341         nilfs_btree_node_set_nchildren(node, nchildren);
342 }
343
344 /* Assume that the buffer head corresponding to node is locked. */
345 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
346                                     struct nilfs_btree_node *node,
347                                     __u64 *keyp, __u64 *ptrp, int index)
348 {
349         __u64 key;
350         __u64 ptr;
351         __le64 *dkeys;
352         __le64 *dptrs;
353         int nchildren;
354
355         dkeys = nilfs_btree_node_dkeys(node);
356         dptrs = nilfs_btree_node_dptrs(node, btree);
357         key = nilfs_bmap_dkey_to_key(dkeys[index]);
358         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
359         nchildren = nilfs_btree_node_get_nchildren(node);
360         if (keyp != NULL)
361                 *keyp = key;
362         if (ptrp != NULL)
363                 *ptrp = ptr;
364
365         if (index < nchildren - 1) {
366                 memmove(dkeys + index, dkeys + index + 1,
367                         (nchildren - index - 1) * sizeof(*dkeys));
368                 memmove(dptrs + index, dptrs + index + 1,
369                         (nchildren - index - 1) * sizeof(*dptrs));
370         }
371         nchildren--;
372         nilfs_btree_node_set_nchildren(node, nchildren);
373 }
374
375 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
376                                    __u64 key, int *indexp)
377 {
378         __u64 nkey;
379         int index, low, high, s;
380
381         /* binary search */
382         low = 0;
383         high = nilfs_btree_node_get_nchildren(node) - 1;
384         index = 0;
385         s = 0;
386         while (low <= high) {
387                 index = (low + high) / 2;
388                 nkey = nilfs_btree_node_get_key(node, index);
389                 if (nkey == key) {
390                         s = 0;
391                         goto out;
392                 } else if (nkey < key) {
393                         low = index + 1;
394                         s = -1;
395                 } else {
396                         high = index - 1;
397                         s = 1;
398                 }
399         }
400
401         /* adjust index */
402         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
403                 if (s > 0 && index > 0)
404                         index--;
405         } else if (s < 0)
406                 index++;
407
408  out:
409         *indexp = index;
410
411         return s == 0;
412 }
413
414 static inline struct nilfs_btree_node *
415 nilfs_btree_get_root(const struct nilfs_btree *btree)
416 {
417         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
418 }
419
420 static inline struct nilfs_btree_node *
421 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
422 {
423         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
424 }
425
426 static inline struct nilfs_btree_node *
427 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
428 {
429         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
430 }
431
432 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
433 {
434         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
435 }
436
437 static inline struct nilfs_btree_node *
438 nilfs_btree_get_node(const struct nilfs_btree *btree,
439                      const struct nilfs_btree_path *path,
440                      int level)
441 {
442         return (level == nilfs_btree_height(btree) - 1) ?
443                 nilfs_btree_get_root(btree) :
444                 nilfs_btree_get_nonroot_node(path, level);
445 }
446
447 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
448                                  struct nilfs_btree_path *path,
449                                  __u64 key, __u64 *ptrp, int minlevel)
450 {
451         struct nilfs_btree_node *node;
452         __u64 ptr;
453         int level, index, found, ret;
454
455         node = nilfs_btree_get_root(btree);
456         level = nilfs_btree_node_get_level(node);
457         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
458                 return -ENOENT;
459
460         found = nilfs_btree_node_lookup(node, key, &index);
461         ptr = nilfs_btree_node_get_ptr(btree, node, index);
462         path[level].bp_bh = NULL;
463         path[level].bp_index = index;
464
465         for (level--; level >= minlevel; level--) {
466                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
467                 if (ret < 0)
468                         return ret;
469                 node = nilfs_btree_get_nonroot_node(path, level);
470                 BUG_ON(level != nilfs_btree_node_get_level(node));
471                 if (!found)
472                         found = nilfs_btree_node_lookup(node, key, &index);
473                 else
474                         index = 0;
475                 if (index < nilfs_btree_node_nchildren_max(node, btree))
476                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
477                 else {
478                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
479                         /* insert */
480                         ptr = NILFS_BMAP_INVALID_PTR;
481                 }
482                 path[level].bp_index = index;
483         }
484         if (!found)
485                 return -ENOENT;
486
487         if (ptrp != NULL)
488                 *ptrp = ptr;
489
490         return 0;
491 }
492
493 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
494                                       struct nilfs_btree_path *path,
495                                       __u64 *keyp, __u64 *ptrp)
496 {
497         struct nilfs_btree_node *node;
498         __u64 ptr;
499         int index, level, ret;
500
501         node = nilfs_btree_get_root(btree);
502         index = nilfs_btree_node_get_nchildren(node) - 1;
503         if (index < 0)
504                 return -ENOENT;
505         level = nilfs_btree_node_get_level(node);
506         ptr = nilfs_btree_node_get_ptr(btree, node, index);
507         path[level].bp_bh = NULL;
508         path[level].bp_index = index;
509
510         for (level--; level > 0; level--) {
511                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
512                 if (ret < 0)
513                         return ret;
514                 node = nilfs_btree_get_nonroot_node(path, level);
515                 BUG_ON(level != nilfs_btree_node_get_level(node));
516                 index = nilfs_btree_node_get_nchildren(node) - 1;
517                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
518                 path[level].bp_index = index;
519         }
520
521         if (keyp != NULL)
522                 *keyp = nilfs_btree_node_get_key(node, index);
523         if (ptrp != NULL)
524                 *ptrp = ptr;
525
526         return 0;
527 }
528
529 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
530                               __u64 key, int level, __u64 *ptrp)
531 {
532         struct nilfs_btree *btree;
533         struct nilfs_btree_path *path;
534         __u64 ptr;
535         int ret;
536
537         btree = (struct nilfs_btree *)bmap;
538         path = nilfs_btree_alloc_path();
539         if (path == NULL)
540                 return -ENOMEM;
541         nilfs_btree_init_path(path);
542
543         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
544
545         if (ptrp != NULL)
546                 *ptrp = ptr;
547
548         nilfs_btree_release_path(path);
549         nilfs_btree_free_path(path);
550
551         return ret;
552 }
553
554 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
555                                      __u64 key, __u64 *ptrp, unsigned maxblocks)
556 {
557         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
558         struct nilfs_btree_path *path;
559         struct nilfs_btree_node *node;
560         struct inode *dat = NULL;
561         __u64 ptr, ptr2;
562         sector_t blocknr;
563         int level = NILFS_BTREE_LEVEL_NODE_MIN;
564         int ret, cnt, index, maxlevel;
565
566         path = nilfs_btree_alloc_path();
567         if (path == NULL)
568                 return -ENOMEM;
569         nilfs_btree_init_path(path);
570         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
571         if (ret < 0)
572                 goto out;
573
574         if (NILFS_BMAP_USE_VBN(bmap)) {
575                 dat = nilfs_bmap_get_dat(bmap);
576                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
577                 if (ret < 0)
578                         goto out;
579                 ptr = blocknr;
580         }
581         cnt = 1;
582         if (cnt == maxblocks)
583                 goto end;
584
585         maxlevel = nilfs_btree_height(btree) - 1;
586         node = nilfs_btree_get_node(btree, path, level);
587         index = path[level].bp_index + 1;
588         for (;;) {
589                 while (index < nilfs_btree_node_get_nchildren(node)) {
590                         if (nilfs_btree_node_get_key(node, index) !=
591                             key + cnt)
592                                 goto end;
593                         ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
594                         if (dat) {
595                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
596                                 if (ret < 0)
597                                         goto out;
598                                 ptr2 = blocknr;
599                         }
600                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
601                                 goto end;
602                         index++;
603                         continue;
604                 }
605                 if (level == maxlevel)
606                         break;
607
608                 /* look-up right sibling node */
609                 node = nilfs_btree_get_node(btree, path, level + 1);
610                 index = path[level + 1].bp_index + 1;
611                 if (index >= nilfs_btree_node_get_nchildren(node) ||
612                     nilfs_btree_node_get_key(node, index) != key + cnt)
613                         break;
614                 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
615                 path[level + 1].bp_index = index;
616
617                 brelse(path[level].bp_bh);
618                 path[level].bp_bh = NULL;
619                 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
620                 if (ret < 0)
621                         goto out;
622                 node = nilfs_btree_get_nonroot_node(path, level);
623                 index = 0;
624                 path[level].bp_index = index;
625         }
626  end:
627         *ptrp = ptr;
628         ret = cnt;
629  out:
630         nilfs_btree_release_path(path);
631         nilfs_btree_free_path(path);
632         return ret;
633 }
634
635 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
636                                     struct nilfs_btree_path *path,
637                                     int level, __u64 key)
638 {
639         if (level < nilfs_btree_height(btree) - 1) {
640                 do {
641                         lock_buffer(path[level].bp_bh);
642                         nilfs_btree_node_set_key(
643                                 nilfs_btree_get_nonroot_node(path, level),
644                                 path[level].bp_index, key);
645                         if (!buffer_dirty(path[level].bp_bh))
646                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
647                         unlock_buffer(path[level].bp_bh);
648                 } while ((path[level].bp_index == 0) &&
649                          (++level < nilfs_btree_height(btree) - 1));
650         }
651
652         /* root */
653         if (level == nilfs_btree_height(btree) - 1) {
654                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
655                                          path[level].bp_index, key);
656         }
657 }
658
659 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
660                                   struct nilfs_btree_path *path,
661                                   int level, __u64 *keyp, __u64 *ptrp)
662 {
663         struct nilfs_btree_node *node;
664
665         if (level < nilfs_btree_height(btree) - 1) {
666                 lock_buffer(path[level].bp_bh);
667                 node = nilfs_btree_get_nonroot_node(path, level);
668                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
669                                         path[level].bp_index);
670                 if (!buffer_dirty(path[level].bp_bh))
671                         nilfs_btnode_mark_dirty(path[level].bp_bh);
672                 unlock_buffer(path[level].bp_bh);
673
674                 if (path[level].bp_index == 0)
675                         nilfs_btree_promote_key(btree, path, level + 1,
676                                                 nilfs_btree_node_get_key(node,
677                                                                          0));
678         } else {
679                 node = nilfs_btree_get_root(btree);
680                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
681                                         path[level].bp_index);
682         }
683 }
684
685 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
686                                    struct nilfs_btree_path *path,
687                                    int level, __u64 *keyp, __u64 *ptrp)
688 {
689         struct nilfs_btree_node *node, *left;
690         int nchildren, lnchildren, n, move;
691
692         lock_buffer(path[level].bp_bh);
693         lock_buffer(path[level].bp_sib_bh);
694
695         node = nilfs_btree_get_nonroot_node(path, level);
696         left = nilfs_btree_get_sib_node(path, level);
697         nchildren = nilfs_btree_node_get_nchildren(node);
698         lnchildren = nilfs_btree_node_get_nchildren(left);
699         move = 0;
700
701         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
702         if (n > path[level].bp_index) {
703                 /* move insert point */
704                 n--;
705                 move = 1;
706         }
707
708         nilfs_btree_node_move_left(btree, left, node, n);
709
710         if (!buffer_dirty(path[level].bp_bh))
711                 nilfs_btnode_mark_dirty(path[level].bp_bh);
712         if (!buffer_dirty(path[level].bp_sib_bh))
713                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
714
715         unlock_buffer(path[level].bp_bh);
716         unlock_buffer(path[level].bp_sib_bh);
717
718         nilfs_btree_promote_key(btree, path, level + 1,
719                                 nilfs_btree_node_get_key(node, 0));
720
721         if (move) {
722                 brelse(path[level].bp_bh);
723                 path[level].bp_bh = path[level].bp_sib_bh;
724                 path[level].bp_sib_bh = NULL;
725                 path[level].bp_index += lnchildren;
726                 path[level + 1].bp_index--;
727         } else {
728                 brelse(path[level].bp_sib_bh);
729                 path[level].bp_sib_bh = NULL;
730                 path[level].bp_index -= n;
731         }
732
733         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
734 }
735
736 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
737                                     struct nilfs_btree_path *path,
738                                     int level, __u64 *keyp, __u64 *ptrp)
739 {
740         struct nilfs_btree_node *node, *right;
741         int nchildren, rnchildren, n, move;
742
743         lock_buffer(path[level].bp_bh);
744         lock_buffer(path[level].bp_sib_bh);
745
746         node = nilfs_btree_get_nonroot_node(path, level);
747         right = nilfs_btree_get_sib_node(path, level);
748         nchildren = nilfs_btree_node_get_nchildren(node);
749         rnchildren = nilfs_btree_node_get_nchildren(right);
750         move = 0;
751
752         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
753         if (n > nchildren - path[level].bp_index) {
754                 /* move insert point */
755                 n--;
756                 move = 1;
757         }
758
759         nilfs_btree_node_move_right(btree, node, right, n);
760
761         if (!buffer_dirty(path[level].bp_bh))
762                 nilfs_btnode_mark_dirty(path[level].bp_bh);
763         if (!buffer_dirty(path[level].bp_sib_bh))
764                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
765
766         unlock_buffer(path[level].bp_bh);
767         unlock_buffer(path[level].bp_sib_bh);
768
769         path[level + 1].bp_index++;
770         nilfs_btree_promote_key(btree, path, level + 1,
771                                 nilfs_btree_node_get_key(right, 0));
772         path[level + 1].bp_index--;
773
774         if (move) {
775                 brelse(path[level].bp_bh);
776                 path[level].bp_bh = path[level].bp_sib_bh;
777                 path[level].bp_sib_bh = NULL;
778                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
779                 path[level + 1].bp_index++;
780         } else {
781                 brelse(path[level].bp_sib_bh);
782                 path[level].bp_sib_bh = NULL;
783         }
784
785         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
786 }
787
788 static void nilfs_btree_split(struct nilfs_btree *btree,
789                               struct nilfs_btree_path *path,
790                               int level, __u64 *keyp, __u64 *ptrp)
791 {
792         struct nilfs_btree_node *node, *right;
793         __u64 newkey;
794         __u64 newptr;
795         int nchildren, n, move;
796
797         lock_buffer(path[level].bp_bh);
798         lock_buffer(path[level].bp_sib_bh);
799
800         node = nilfs_btree_get_nonroot_node(path, level);
801         right = nilfs_btree_get_sib_node(path, level);
802         nchildren = nilfs_btree_node_get_nchildren(node);
803         move = 0;
804
805         n = (nchildren + 1) / 2;
806         if (n > nchildren - path[level].bp_index) {
807                 n--;
808                 move = 1;
809         }
810
811         nilfs_btree_node_move_right(btree, node, right, n);
812
813         if (!buffer_dirty(path[level].bp_bh))
814                 nilfs_btnode_mark_dirty(path[level].bp_bh);
815         if (!buffer_dirty(path[level].bp_sib_bh))
816                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
817
818         unlock_buffer(path[level].bp_bh);
819         unlock_buffer(path[level].bp_sib_bh);
820
821         newkey = nilfs_btree_node_get_key(right, 0);
822         newptr = path[level].bp_newreq.bpr_ptr;
823
824         if (move) {
825                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
826                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
827                                         path[level].bp_index);
828
829                 *keyp = nilfs_btree_node_get_key(right, 0);
830                 *ptrp = path[level].bp_newreq.bpr_ptr;
831
832                 brelse(path[level].bp_bh);
833                 path[level].bp_bh = path[level].bp_sib_bh;
834                 path[level].bp_sib_bh = NULL;
835         } else {
836                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
837
838                 *keyp = nilfs_btree_node_get_key(right, 0);
839                 *ptrp = path[level].bp_newreq.bpr_ptr;
840
841                 brelse(path[level].bp_sib_bh);
842                 path[level].bp_sib_bh = NULL;
843         }
844
845         path[level + 1].bp_index++;
846 }
847
848 static void nilfs_btree_grow(struct nilfs_btree *btree,
849                              struct nilfs_btree_path *path,
850                              int level, __u64 *keyp, __u64 *ptrp)
851 {
852         struct nilfs_btree_node *root, *child;
853         int n;
854
855         lock_buffer(path[level].bp_sib_bh);
856
857         root = nilfs_btree_get_root(btree);
858         child = nilfs_btree_get_sib_node(path, level);
859
860         n = nilfs_btree_node_get_nchildren(root);
861
862         nilfs_btree_node_move_right(btree, root, child, n);
863         nilfs_btree_node_set_level(root, level + 1);
864
865         if (!buffer_dirty(path[level].bp_sib_bh))
866                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
867
868         unlock_buffer(path[level].bp_sib_bh);
869
870         path[level].bp_bh = path[level].bp_sib_bh;
871         path[level].bp_sib_bh = NULL;
872
873         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
874
875         *keyp = nilfs_btree_node_get_key(child, 0);
876         *ptrp = path[level].bp_newreq.bpr_ptr;
877 }
878
879 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
880                                    const struct nilfs_btree_path *path)
881 {
882         struct nilfs_btree_node *node;
883         int level;
884
885         if (path == NULL)
886                 return NILFS_BMAP_INVALID_PTR;
887
888         /* left sibling */
889         level = NILFS_BTREE_LEVEL_NODE_MIN;
890         if (path[level].bp_index > 0) {
891                 node = nilfs_btree_get_node(btree, path, level);
892                 return nilfs_btree_node_get_ptr(btree, node,
893                                                 path[level].bp_index - 1);
894         }
895
896         /* parent */
897         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
898         if (level <= nilfs_btree_height(btree) - 1) {
899                 node = nilfs_btree_get_node(btree, path, level);
900                 return nilfs_btree_node_get_ptr(btree, node,
901                                                 path[level].bp_index);
902         }
903
904         return NILFS_BMAP_INVALID_PTR;
905 }
906
907 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
908                                        const struct nilfs_btree_path *path,
909                                        __u64 key)
910 {
911         __u64 ptr;
912
913         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
914         if (ptr != NILFS_BMAP_INVALID_PTR)
915                 /* sequential access */
916                 return ptr;
917         else {
918                 ptr = nilfs_btree_find_near(btree, path);
919                 if (ptr != NILFS_BMAP_INVALID_PTR)
920                         /* near */
921                         return ptr;
922         }
923         /* block group */
924         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
925 }
926
927 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
928                                      __u64 ptr)
929 {
930         btree->bt_bmap.b_last_allocated_key = key;
931         btree->bt_bmap.b_last_allocated_ptr = ptr;
932 }
933
934 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
935                                       struct nilfs_btree_path *path,
936                                       int *levelp, __u64 key, __u64 ptr,
937                                       struct nilfs_bmap_stats *stats)
938 {
939         struct buffer_head *bh;
940         struct nilfs_btree_node *node, *parent, *sib;
941         __u64 sibptr;
942         int pindex, level, ret;
943
944         stats->bs_nblocks = 0;
945         level = NILFS_BTREE_LEVEL_DATA;
946
947         /* allocate a new ptr for data block */
948         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap))
949                 path[level].bp_newreq.bpr_ptr =
950                         nilfs_btree_find_target_v(btree, path, key);
951
952         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
953                                            &path[level].bp_newreq);
954         if (ret < 0)
955                 goto err_out_data;
956
957         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
958              level < nilfs_btree_height(btree) - 1;
959              level++) {
960                 node = nilfs_btree_get_nonroot_node(path, level);
961                 if (nilfs_btree_node_get_nchildren(node) <
962                     nilfs_btree_node_nchildren_max(node, btree)) {
963                         path[level].bp_op = nilfs_btree_do_insert;
964                         stats->bs_nblocks++;
965                         goto out;
966                 }
967
968                 parent = nilfs_btree_get_node(btree, path, level + 1);
969                 pindex = path[level + 1].bp_index;
970
971                 /* left sibling */
972                 if (pindex > 0) {
973                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
974                                                           pindex - 1);
975                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
976                         if (ret < 0)
977                                 goto err_out_child_node;
978                         sib = (struct nilfs_btree_node *)bh->b_data;
979                         if (nilfs_btree_node_get_nchildren(sib) <
980                             nilfs_btree_node_nchildren_max(sib, btree)) {
981                                 path[level].bp_sib_bh = bh;
982                                 path[level].bp_op = nilfs_btree_carry_left;
983                                 stats->bs_nblocks++;
984                                 goto out;
985                         } else
986                                 brelse(bh);
987                 }
988
989                 /* right sibling */
990                 if (pindex <
991                     nilfs_btree_node_get_nchildren(parent) - 1) {
992                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
993                                                           pindex + 1);
994                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
995                         if (ret < 0)
996                                 goto err_out_child_node;
997                         sib = (struct nilfs_btree_node *)bh->b_data;
998                         if (nilfs_btree_node_get_nchildren(sib) <
999                             nilfs_btree_node_nchildren_max(sib, btree)) {
1000                                 path[level].bp_sib_bh = bh;
1001                                 path[level].bp_op = nilfs_btree_carry_right;
1002                                 stats->bs_nblocks++;
1003                                 goto out;
1004                         } else
1005                                 brelse(bh);
1006                 }
1007
1008                 /* split */
1009                 path[level].bp_newreq.bpr_ptr =
1010                         path[level - 1].bp_newreq.bpr_ptr + 1;
1011                 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1012                                                    &path[level].bp_newreq);
1013                 if (ret < 0)
1014                         goto err_out_child_node;
1015                 ret = nilfs_btree_get_new_block(btree,
1016                                                 path[level].bp_newreq.bpr_ptr,
1017                                                 &bh);
1018                 if (ret < 0)
1019                         goto err_out_curr_node;
1020
1021                 stats->bs_nblocks++;
1022
1023                 lock_buffer(bh);
1024                 nilfs_btree_node_init(btree,
1025                                       (struct nilfs_btree_node *)bh->b_data,
1026                                       0, level, 0, NULL, NULL);
1027                 unlock_buffer(bh);
1028                 path[level].bp_sib_bh = bh;
1029                 path[level].bp_op = nilfs_btree_split;
1030         }
1031
1032         /* root */
1033         node = nilfs_btree_get_root(btree);
1034         if (nilfs_btree_node_get_nchildren(node) <
1035             nilfs_btree_node_nchildren_max(node, btree)) {
1036                 path[level].bp_op = nilfs_btree_do_insert;
1037                 stats->bs_nblocks++;
1038                 goto out;
1039         }
1040
1041         /* grow */
1042         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1043         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1044                                            &path[level].bp_newreq);
1045         if (ret < 0)
1046                 goto err_out_child_node;
1047         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1048                                         &bh);
1049         if (ret < 0)
1050                 goto err_out_curr_node;
1051
1052         lock_buffer(bh);
1053         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1054                               0, level, 0, NULL, NULL);
1055         unlock_buffer(bh);
1056         path[level].bp_sib_bh = bh;
1057         path[level].bp_op = nilfs_btree_grow;
1058
1059         level++;
1060         path[level].bp_op = nilfs_btree_do_insert;
1061
1062         /* a newly-created node block and a data block are added */
1063         stats->bs_nblocks += 2;
1064
1065         /* success */
1066  out:
1067         *levelp = level;
1068         return ret;
1069
1070         /* error */
1071  err_out_curr_node:
1072         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
1073  err_out_child_node:
1074         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1075                 nilfs_btnode_delete(path[level].bp_sib_bh);
1076                 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1077                                            &path[level].bp_newreq);
1078
1079         }
1080
1081         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
1082  err_out_data:
1083         *levelp = level;
1084         stats->bs_nblocks = 0;
1085         return ret;
1086 }
1087
1088 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1089                                       struct nilfs_btree_path *path,
1090                                       int maxlevel, __u64 key, __u64 ptr)
1091 {
1092         int level;
1093
1094         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1095         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1096         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap))
1097                 nilfs_btree_set_target_v(btree, key, ptr);
1098
1099         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1100                 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1101                                             &path[level - 1].bp_newreq);
1102                 path[level].bp_op(btree, path, level, &key, &ptr);
1103         }
1104
1105         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1106                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1107 }
1108
1109 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1110 {
1111         struct nilfs_btree *btree;
1112         struct nilfs_btree_path *path;
1113         struct nilfs_bmap_stats stats;
1114         int level, ret;
1115
1116         btree = (struct nilfs_btree *)bmap;
1117         path = nilfs_btree_alloc_path();
1118         if (path == NULL)
1119                 return -ENOMEM;
1120         nilfs_btree_init_path(path);
1121
1122         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1123                                     NILFS_BTREE_LEVEL_NODE_MIN);
1124         if (ret != -ENOENT) {
1125                 if (ret == 0)
1126                         ret = -EEXIST;
1127                 goto out;
1128         }
1129
1130         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1131         if (ret < 0)
1132                 goto out;
1133         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1134         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1135
1136  out:
1137         nilfs_btree_release_path(path);
1138         nilfs_btree_free_path(path);
1139         return ret;
1140 }
1141
1142 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1143                                   struct nilfs_btree_path *path,
1144                                   int level, __u64 *keyp, __u64 *ptrp)
1145 {
1146         struct nilfs_btree_node *node;
1147
1148         if (level < nilfs_btree_height(btree) - 1) {
1149                 lock_buffer(path[level].bp_bh);
1150                 node = nilfs_btree_get_nonroot_node(path, level);
1151                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1152                                         path[level].bp_index);
1153                 if (!buffer_dirty(path[level].bp_bh))
1154                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1155                 unlock_buffer(path[level].bp_bh);
1156                 if (path[level].bp_index == 0)
1157                         nilfs_btree_promote_key(btree, path, level + 1,
1158                                 nilfs_btree_node_get_key(node, 0));
1159         } else {
1160                 node = nilfs_btree_get_root(btree);
1161                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1162                                         path[level].bp_index);
1163         }
1164 }
1165
1166 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1167                                     struct nilfs_btree_path *path,
1168                                     int level, __u64 *keyp, __u64 *ptrp)
1169 {
1170         struct nilfs_btree_node *node, *left;
1171         int nchildren, lnchildren, n;
1172
1173         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1174
1175         lock_buffer(path[level].bp_bh);
1176         lock_buffer(path[level].bp_sib_bh);
1177
1178         node = nilfs_btree_get_nonroot_node(path, level);
1179         left = nilfs_btree_get_sib_node(path, level);
1180         nchildren = nilfs_btree_node_get_nchildren(node);
1181         lnchildren = nilfs_btree_node_get_nchildren(left);
1182
1183         n = (nchildren + lnchildren) / 2 - nchildren;
1184
1185         nilfs_btree_node_move_right(btree, left, node, n);
1186
1187         if (!buffer_dirty(path[level].bp_bh))
1188                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1189         if (!buffer_dirty(path[level].bp_sib_bh))
1190                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1191
1192         unlock_buffer(path[level].bp_bh);
1193         unlock_buffer(path[level].bp_sib_bh);
1194
1195         nilfs_btree_promote_key(btree, path, level + 1,
1196                                 nilfs_btree_node_get_key(node, 0));
1197
1198         brelse(path[level].bp_sib_bh);
1199         path[level].bp_sib_bh = NULL;
1200         path[level].bp_index += n;
1201 }
1202
1203 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1204                                      struct nilfs_btree_path *path,
1205                                      int level, __u64 *keyp, __u64 *ptrp)
1206 {
1207         struct nilfs_btree_node *node, *right;
1208         int nchildren, rnchildren, n;
1209
1210         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1211
1212         lock_buffer(path[level].bp_bh);
1213         lock_buffer(path[level].bp_sib_bh);
1214
1215         node = nilfs_btree_get_nonroot_node(path, level);
1216         right = nilfs_btree_get_sib_node(path, level);
1217         nchildren = nilfs_btree_node_get_nchildren(node);
1218         rnchildren = nilfs_btree_node_get_nchildren(right);
1219
1220         n = (nchildren + rnchildren) / 2 - nchildren;
1221
1222         nilfs_btree_node_move_left(btree, node, right, n);
1223
1224         if (!buffer_dirty(path[level].bp_bh))
1225                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1226         if (!buffer_dirty(path[level].bp_sib_bh))
1227                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1228
1229         unlock_buffer(path[level].bp_bh);
1230         unlock_buffer(path[level].bp_sib_bh);
1231
1232         path[level + 1].bp_index++;
1233         nilfs_btree_promote_key(btree, path, level + 1,
1234                                 nilfs_btree_node_get_key(right, 0));
1235         path[level + 1].bp_index--;
1236
1237         brelse(path[level].bp_sib_bh);
1238         path[level].bp_sib_bh = NULL;
1239 }
1240
1241 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1242                                     struct nilfs_btree_path *path,
1243                                     int level, __u64 *keyp, __u64 *ptrp)
1244 {
1245         struct nilfs_btree_node *node, *left;
1246         int n;
1247
1248         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1249
1250         lock_buffer(path[level].bp_bh);
1251         lock_buffer(path[level].bp_sib_bh);
1252
1253         node = nilfs_btree_get_nonroot_node(path, level);
1254         left = nilfs_btree_get_sib_node(path, level);
1255
1256         n = nilfs_btree_node_get_nchildren(node);
1257
1258         nilfs_btree_node_move_left(btree, left, node, n);
1259
1260         if (!buffer_dirty(path[level].bp_sib_bh))
1261                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1262
1263         unlock_buffer(path[level].bp_bh);
1264         unlock_buffer(path[level].bp_sib_bh);
1265
1266         nilfs_btnode_delete(path[level].bp_bh);
1267         path[level].bp_bh = path[level].bp_sib_bh;
1268         path[level].bp_sib_bh = NULL;
1269         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1270 }
1271
1272 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1273                                      struct nilfs_btree_path *path,
1274                                      int level, __u64 *keyp, __u64 *ptrp)
1275 {
1276         struct nilfs_btree_node *node, *right;
1277         int n;
1278
1279         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1280
1281         lock_buffer(path[level].bp_bh);
1282         lock_buffer(path[level].bp_sib_bh);
1283
1284         node = nilfs_btree_get_nonroot_node(path, level);
1285         right = nilfs_btree_get_sib_node(path, level);
1286
1287         n = nilfs_btree_node_get_nchildren(right);
1288
1289         nilfs_btree_node_move_left(btree, node, right, n);
1290
1291         if (!buffer_dirty(path[level].bp_bh))
1292                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1293
1294         unlock_buffer(path[level].bp_bh);
1295         unlock_buffer(path[level].bp_sib_bh);
1296
1297         nilfs_btnode_delete(path[level].bp_sib_bh);
1298         path[level].bp_sib_bh = NULL;
1299         path[level + 1].bp_index++;
1300 }
1301
1302 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1303                                struct nilfs_btree_path *path,
1304                                int level, __u64 *keyp, __u64 *ptrp)
1305 {
1306         struct nilfs_btree_node *root, *child;
1307         int n;
1308
1309         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1310
1311         lock_buffer(path[level].bp_bh);
1312         root = nilfs_btree_get_root(btree);
1313         child = nilfs_btree_get_nonroot_node(path, level);
1314
1315         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1316         nilfs_btree_node_set_level(root, level);
1317         n = nilfs_btree_node_get_nchildren(child);
1318         nilfs_btree_node_move_left(btree, root, child, n);
1319         unlock_buffer(path[level].bp_bh);
1320
1321         nilfs_btnode_delete(path[level].bp_bh);
1322         path[level].bp_bh = NULL;
1323 }
1324
1325
1326 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1327                                       struct nilfs_btree_path *path,
1328                                       int *levelp,
1329                                       struct nilfs_bmap_stats *stats)
1330 {
1331         struct buffer_head *bh;
1332         struct nilfs_btree_node *node, *parent, *sib;
1333         __u64 sibptr;
1334         int pindex, level, ret;
1335
1336         ret = 0;
1337         stats->bs_nblocks = 0;
1338         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1339              level < nilfs_btree_height(btree) - 1;
1340              level++) {
1341                 node = nilfs_btree_get_nonroot_node(path, level);
1342                 path[level].bp_oldreq.bpr_ptr =
1343                         nilfs_btree_node_get_ptr(btree, node,
1344                                                  path[level].bp_index);
1345                 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1346                                                  &path[level].bp_oldreq);
1347                 if (ret < 0)
1348                         goto err_out_child_node;
1349
1350                 if (nilfs_btree_node_get_nchildren(node) >
1351                     nilfs_btree_node_nchildren_min(node, btree)) {
1352                         path[level].bp_op = nilfs_btree_do_delete;
1353                         stats->bs_nblocks++;
1354                         goto out;
1355                 }
1356
1357                 parent = nilfs_btree_get_node(btree, path, level + 1);
1358                 pindex = path[level + 1].bp_index;
1359
1360                 if (pindex > 0) {
1361                         /* left sibling */
1362                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1363                                                           pindex - 1);
1364                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1365                         if (ret < 0)
1366                                 goto err_out_curr_node;
1367                         sib = (struct nilfs_btree_node *)bh->b_data;
1368                         if (nilfs_btree_node_get_nchildren(sib) >
1369                             nilfs_btree_node_nchildren_min(sib, btree)) {
1370                                 path[level].bp_sib_bh = bh;
1371                                 path[level].bp_op = nilfs_btree_borrow_left;
1372                                 stats->bs_nblocks++;
1373                                 goto out;
1374                         } else {
1375                                 path[level].bp_sib_bh = bh;
1376                                 path[level].bp_op = nilfs_btree_concat_left;
1377                                 stats->bs_nblocks++;
1378                                 /* continue; */
1379                         }
1380                 } else if (pindex <
1381                            nilfs_btree_node_get_nchildren(parent) - 1) {
1382                         /* right sibling */
1383                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1384                                                           pindex + 1);
1385                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1386                         if (ret < 0)
1387                                 goto err_out_curr_node;
1388                         sib = (struct nilfs_btree_node *)bh->b_data;
1389                         if (nilfs_btree_node_get_nchildren(sib) >
1390                             nilfs_btree_node_nchildren_min(sib, btree)) {
1391                                 path[level].bp_sib_bh = bh;
1392                                 path[level].bp_op = nilfs_btree_borrow_right;
1393                                 stats->bs_nblocks++;
1394                                 goto out;
1395                         } else {
1396                                 path[level].bp_sib_bh = bh;
1397                                 path[level].bp_op = nilfs_btree_concat_right;
1398                                 stats->bs_nblocks++;
1399                                 /* continue; */
1400                         }
1401                 } else {
1402                         /* no siblings */
1403                         /* the only child of the root node */
1404                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1405                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1406                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1407                                 path[level].bp_op = nilfs_btree_shrink;
1408                                 stats->bs_nblocks += 2;
1409                         } else {
1410                                 path[level].bp_op = nilfs_btree_do_delete;
1411                                 stats->bs_nblocks++;
1412                         }
1413
1414                         goto out;
1415
1416                 }
1417         }
1418
1419         node = nilfs_btree_get_root(btree);
1420         path[level].bp_oldreq.bpr_ptr =
1421                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1422
1423         ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1424                                          &path[level].bp_oldreq);
1425         if (ret < 0)
1426                 goto err_out_child_node;
1427
1428         /* child of the root node is deleted */
1429         path[level].bp_op = nilfs_btree_do_delete;
1430         stats->bs_nblocks++;
1431
1432         /* success */
1433  out:
1434         *levelp = level;
1435         return ret;
1436
1437         /* error */
1438  err_out_curr_node:
1439         nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq);
1440  err_out_child_node:
1441         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1442                 brelse(path[level].bp_sib_bh);
1443                 nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1444                                          &path[level].bp_oldreq);
1445         }
1446         *levelp = level;
1447         stats->bs_nblocks = 0;
1448         return ret;
1449 }
1450
1451 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1452                                       struct nilfs_btree_path *path,
1453                                       int maxlevel)
1454 {
1455         int level;
1456
1457         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1458                 nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1459                                           &path[level].bp_oldreq);
1460                 path[level].bp_op(btree, path, level, NULL, NULL);
1461         }
1462
1463         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1464                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1465 }
1466
1467 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1468
1469 {
1470         struct nilfs_btree *btree;
1471         struct nilfs_btree_path *path;
1472         struct nilfs_bmap_stats stats;
1473         int level, ret;
1474
1475         btree = (struct nilfs_btree *)bmap;
1476         path = nilfs_btree_alloc_path();
1477         if (path == NULL)
1478                 return -ENOMEM;
1479         nilfs_btree_init_path(path);
1480         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1481                                     NILFS_BTREE_LEVEL_NODE_MIN);
1482         if (ret < 0)
1483                 goto out;
1484
1485         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats);
1486         if (ret < 0)
1487                 goto out;
1488         nilfs_btree_commit_delete(btree, path, level);
1489         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1490
1491 out:
1492         nilfs_btree_release_path(path);
1493         nilfs_btree_free_path(path);
1494         return ret;
1495 }
1496
1497 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1498 {
1499         struct nilfs_btree *btree;
1500         struct nilfs_btree_path *path;
1501         int ret;
1502
1503         btree = (struct nilfs_btree *)bmap;
1504         path = nilfs_btree_alloc_path();
1505         if (path == NULL)
1506                 return -ENOMEM;
1507         nilfs_btree_init_path(path);
1508
1509         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1510
1511         nilfs_btree_release_path(path);
1512         nilfs_btree_free_path(path);
1513
1514         return ret;
1515 }
1516
1517 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1518 {
1519         struct buffer_head *bh;
1520         struct nilfs_btree *btree;
1521         struct nilfs_btree_node *root, *node;
1522         __u64 maxkey, nextmaxkey;
1523         __u64 ptr;
1524         int nchildren, ret;
1525
1526         btree = (struct nilfs_btree *)bmap;
1527         root = nilfs_btree_get_root(btree);
1528         switch (nilfs_btree_height(btree)) {
1529         case 2:
1530                 bh = NULL;
1531                 node = root;
1532                 break;
1533         case 3:
1534                 nchildren = nilfs_btree_node_get_nchildren(root);
1535                 if (nchildren > 1)
1536                         return 0;
1537                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1538                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1539                 if (ret < 0)
1540                         return ret;
1541                 node = (struct nilfs_btree_node *)bh->b_data;
1542                 break;
1543         default:
1544                 return 0;
1545         }
1546
1547         nchildren = nilfs_btree_node_get_nchildren(node);
1548         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1549         nextmaxkey = (nchildren > 1) ?
1550                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1551         if (bh != NULL)
1552                 brelse(bh);
1553
1554         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1555 }
1556
1557 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1558                                    __u64 *keys, __u64 *ptrs, int nitems)
1559 {
1560         struct buffer_head *bh;
1561         struct nilfs_btree *btree;
1562         struct nilfs_btree_node *node, *root;
1563         __le64 *dkeys;
1564         __le64 *dptrs;
1565         __u64 ptr;
1566         int nchildren, i, ret;
1567
1568         btree = (struct nilfs_btree *)bmap;
1569         root = nilfs_btree_get_root(btree);
1570         switch (nilfs_btree_height(btree)) {
1571         case 2:
1572                 bh = NULL;
1573                 node = root;
1574                 break;
1575         case 3:
1576                 nchildren = nilfs_btree_node_get_nchildren(root);
1577                 WARN_ON(nchildren > 1);
1578                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1579                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1580                 if (ret < 0)
1581                         return ret;
1582                 node = (struct nilfs_btree_node *)bh->b_data;
1583                 break;
1584         default:
1585                 node = NULL;
1586                 return -EINVAL;
1587         }
1588
1589         nchildren = nilfs_btree_node_get_nchildren(node);
1590         if (nchildren < nitems)
1591                 nitems = nchildren;
1592         dkeys = nilfs_btree_node_dkeys(node);
1593         dptrs = nilfs_btree_node_dptrs(node, btree);
1594         for (i = 0; i < nitems; i++) {
1595                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1596                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1597         }
1598
1599         if (bh != NULL)
1600                 brelse(bh);
1601
1602         return nitems;
1603 }
1604
1605 static int
1606 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1607                                        union nilfs_bmap_ptr_req *dreq,
1608                                        union nilfs_bmap_ptr_req *nreq,
1609                                        struct buffer_head **bhp,
1610                                        struct nilfs_bmap_stats *stats)
1611 {
1612         struct buffer_head *bh;
1613         struct nilfs_btree *btree;
1614         int ret;
1615
1616         btree = (struct nilfs_btree *)bmap;
1617         stats->bs_nblocks = 0;
1618
1619         /* for data */
1620         /* cannot find near ptr */
1621         if (NILFS_BMAP_USE_VBN(bmap))
1622                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1623
1624         ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq);
1625         if (ret < 0)
1626                 return ret;
1627
1628         *bhp = NULL;
1629         stats->bs_nblocks++;
1630         if (nreq != NULL) {
1631                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1632                 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq);
1633                 if (ret < 0)
1634                         goto err_out_dreq;
1635
1636                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1637                 if (ret < 0)
1638                         goto err_out_nreq;
1639
1640                 *bhp = bh;
1641                 stats->bs_nblocks++;
1642         }
1643
1644         /* success */
1645         return 0;
1646
1647         /* error */
1648  err_out_nreq:
1649         nilfs_bmap_abort_alloc_ptr(bmap, nreq);
1650  err_out_dreq:
1651         nilfs_bmap_abort_alloc_ptr(bmap, dreq);
1652         stats->bs_nblocks = 0;
1653         return ret;
1654
1655 }
1656
1657 static void
1658 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1659                                       __u64 key, __u64 ptr,
1660                                       const __u64 *keys, const __u64 *ptrs,
1661                                       int n,
1662                                       union nilfs_bmap_ptr_req *dreq,
1663                                       union nilfs_bmap_ptr_req *nreq,
1664                                       struct buffer_head *bh)
1665 {
1666         struct nilfs_btree *btree;
1667         struct nilfs_btree_node *node;
1668         __u64 tmpptr;
1669
1670         /* free resources */
1671         if (bmap->b_ops->bop_clear != NULL)
1672                 bmap->b_ops->bop_clear(bmap);
1673
1674         /* ptr must be a pointer to a buffer head. */
1675         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1676
1677         /* convert and insert */
1678         btree = (struct nilfs_btree *)bmap;
1679         nilfs_btree_init(bmap);
1680         if (nreq != NULL) {
1681                 nilfs_bmap_commit_alloc_ptr(bmap, dreq);
1682                 nilfs_bmap_commit_alloc_ptr(bmap, nreq);
1683
1684                 /* create child node at level 1 */
1685                 lock_buffer(bh);
1686                 node = (struct nilfs_btree_node *)bh->b_data;
1687                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1688                 nilfs_btree_node_insert(btree, node,
1689                                         key, dreq->bpr_ptr, n);
1690                 if (!buffer_dirty(bh))
1691                         nilfs_btnode_mark_dirty(bh);
1692                 if (!nilfs_bmap_dirty(bmap))
1693                         nilfs_bmap_set_dirty(bmap);
1694
1695                 unlock_buffer(bh);
1696                 brelse(bh);
1697
1698                 /* create root node at level 2 */
1699                 node = nilfs_btree_get_root(btree);
1700                 tmpptr = nreq->bpr_ptr;
1701                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1702                                       2, 1, &keys[0], &tmpptr);
1703         } else {
1704                 nilfs_bmap_commit_alloc_ptr(bmap, dreq);
1705
1706                 /* create root node at level 1 */
1707                 node = nilfs_btree_get_root(btree);
1708                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1709                                       1, n, keys, ptrs);
1710                 nilfs_btree_node_insert(btree, node,
1711                                         key, dreq->bpr_ptr, n);
1712                 if (!nilfs_bmap_dirty(bmap))
1713                         nilfs_bmap_set_dirty(bmap);
1714         }
1715
1716         if (NILFS_BMAP_USE_VBN(bmap))
1717                 nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1718 }
1719
1720 /**
1721  * nilfs_btree_convert_and_insert -
1722  * @bmap:
1723  * @key:
1724  * @ptr:
1725  * @keys:
1726  * @ptrs:
1727  * @n:
1728  */
1729 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1730                                    __u64 key, __u64 ptr,
1731                                    const __u64 *keys, const __u64 *ptrs, int n)
1732 {
1733         struct buffer_head *bh;
1734         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1735         struct nilfs_bmap_stats stats;
1736         int ret;
1737
1738         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1739                 di = &dreq;
1740                 ni = NULL;
1741         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1742                            1 << bmap->b_inode->i_blkbits)) {
1743                 di = &dreq;
1744                 ni = &nreq;
1745         } else {
1746                 di = NULL;
1747                 ni = NULL;
1748                 BUG();
1749         }
1750
1751         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1752                                                      &stats);
1753         if (ret < 0)
1754                 return ret;
1755         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1756                                               di, ni, bh);
1757         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1758         return 0;
1759 }
1760
1761 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1762                                    struct nilfs_btree_path *path,
1763                                    int level,
1764                                    struct buffer_head *bh)
1765 {
1766         while ((++level < nilfs_btree_height(btree) - 1) &&
1767                !buffer_dirty(path[level].bp_bh))
1768                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1769
1770         return 0;
1771 }
1772
1773 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1774                                         struct nilfs_btree_path *path,
1775                                         int level)
1776 {
1777         struct nilfs_btree_node *parent;
1778         int ret;
1779
1780         parent = nilfs_btree_get_node(btree, path, level + 1);
1781         path[level].bp_oldreq.bpr_ptr =
1782                 nilfs_btree_node_get_ptr(btree, parent,
1783                                          path[level + 1].bp_index);
1784         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1785         ret = nilfs_bmap_prepare_update_v(&btree->bt_bmap,
1786                                           &path[level].bp_oldreq,
1787                                           &path[level].bp_newreq);
1788         if (ret < 0)
1789                 return ret;
1790
1791         if (buffer_nilfs_node(path[level].bp_bh)) {
1792                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1793                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1794                 path[level].bp_ctxt.bh = path[level].bp_bh;
1795                 ret = nilfs_btnode_prepare_change_key(
1796                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1797                         &path[level].bp_ctxt);
1798                 if (ret < 0) {
1799                         nilfs_bmap_abort_update_v(&btree->bt_bmap,
1800                                                   &path[level].bp_oldreq,
1801                                                   &path[level].bp_newreq);
1802                         return ret;
1803                 }
1804         }
1805
1806         return 0;
1807 }
1808
1809 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1810                                         struct nilfs_btree_path *path,
1811                                         int level)
1812 {
1813         struct nilfs_btree_node *parent;
1814
1815         nilfs_bmap_commit_update_v(&btree->bt_bmap,
1816                                    &path[level].bp_oldreq,
1817                                    &path[level].bp_newreq);
1818
1819         if (buffer_nilfs_node(path[level].bp_bh)) {
1820                 nilfs_btnode_commit_change_key(
1821                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1822                         &path[level].bp_ctxt);
1823                 path[level].bp_bh = path[level].bp_ctxt.bh;
1824         }
1825         set_buffer_nilfs_volatile(path[level].bp_bh);
1826
1827         parent = nilfs_btree_get_node(btree, path, level + 1);
1828         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1829                                  path[level].bp_newreq.bpr_ptr);
1830 }
1831
1832 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1833                                        struct nilfs_btree_path *path,
1834                                        int level)
1835 {
1836         nilfs_bmap_abort_update_v(&btree->bt_bmap,
1837                                   &path[level].bp_oldreq,
1838                                   &path[level].bp_newreq);
1839         if (buffer_nilfs_node(path[level].bp_bh))
1840                 nilfs_btnode_abort_change_key(
1841                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1842                         &path[level].bp_ctxt);
1843 }
1844
1845 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1846                                            struct nilfs_btree_path *path,
1847                                            int minlevel,
1848                                            int *maxlevelp)
1849 {
1850         int level, ret;
1851
1852         level = minlevel;
1853         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1854                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1855                 if (ret < 0)
1856                         return ret;
1857         }
1858         while ((++level < nilfs_btree_height(btree) - 1) &&
1859                !buffer_dirty(path[level].bp_bh)) {
1860
1861                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1862                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1863                 if (ret < 0)
1864                         goto out;
1865         }
1866
1867         /* success */
1868         *maxlevelp = level - 1;
1869         return 0;
1870
1871         /* error */
1872  out:
1873         while (--level > minlevel)
1874                 nilfs_btree_abort_update_v(btree, path, level);
1875         if (!buffer_nilfs_volatile(path[level].bp_bh))
1876                 nilfs_btree_abort_update_v(btree, path, level);
1877         return ret;
1878 }
1879
1880 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1881                                            struct nilfs_btree_path *path,
1882                                            int minlevel,
1883                                            int maxlevel,
1884                                            struct buffer_head *bh)
1885 {
1886         int level;
1887
1888         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1889                 nilfs_btree_commit_update_v(btree, path, minlevel);
1890
1891         for (level = minlevel + 1; level <= maxlevel; level++)
1892                 nilfs_btree_commit_update_v(btree, path, level);
1893 }
1894
1895 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1896                                    struct nilfs_btree_path *path,
1897                                    int level,
1898                                    struct buffer_head *bh)
1899 {
1900         int maxlevel, ret;
1901         struct nilfs_btree_node *parent;
1902         __u64 ptr;
1903
1904         get_bh(bh);
1905         path[level].bp_bh = bh;
1906         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel);
1907         if (ret < 0)
1908                 goto out;
1909
1910         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1911                 parent = nilfs_btree_get_node(btree, path, level + 1);
1912                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1913                                                path[level + 1].bp_index);
1914                 ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr);
1915                 if (ret < 0)
1916                         goto out;
1917         }
1918
1919         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh);
1920
1921  out:
1922         brelse(path[level].bp_bh);
1923         path[level].bp_bh = NULL;
1924         return ret;
1925 }
1926
1927 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1928                                  struct buffer_head *bh)
1929 {
1930         struct nilfs_btree *btree;
1931         struct nilfs_btree_path *path;
1932         struct nilfs_btree_node *node;
1933         __u64 key;
1934         int level, ret;
1935
1936         WARN_ON(!buffer_dirty(bh));
1937
1938         btree = (struct nilfs_btree *)bmap;
1939         path = nilfs_btree_alloc_path();
1940         if (path == NULL)
1941                 return -ENOMEM;
1942         nilfs_btree_init_path(path);
1943
1944         if (buffer_nilfs_node(bh)) {
1945                 node = (struct nilfs_btree_node *)bh->b_data;
1946                 key = nilfs_btree_node_get_key(node, 0);
1947                 level = nilfs_btree_node_get_level(node);
1948         } else {
1949                 key = nilfs_bmap_data_get_key(bmap, bh);
1950                 level = NILFS_BTREE_LEVEL_DATA;
1951         }
1952
1953         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1954         if (ret < 0) {
1955                 if (unlikely(ret == -ENOENT))
1956                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1957                                __func__, (unsigned long long)key, level);
1958                 goto out;
1959         }
1960
1961         ret = NILFS_BMAP_USE_VBN(bmap) ?
1962                 nilfs_btree_propagate_v(btree, path, level, bh) :
1963                 nilfs_btree_propagate_p(btree, path, level, bh);
1964
1965  out:
1966         nilfs_btree_release_path(path);
1967         nilfs_btree_free_path(path);
1968
1969         return ret;
1970 }
1971
1972 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1973                                     struct buffer_head *bh)
1974 {
1975         return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr);
1976 }
1977
1978 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1979                                          struct list_head *lists,
1980                                          struct buffer_head *bh)
1981 {
1982         struct list_head *head;
1983         struct buffer_head *cbh;
1984         struct nilfs_btree_node *node, *cnode;
1985         __u64 key, ckey;
1986         int level;
1987
1988         get_bh(bh);
1989         node = (struct nilfs_btree_node *)bh->b_data;
1990         key = nilfs_btree_node_get_key(node, 0);
1991         level = nilfs_btree_node_get_level(node);
1992         list_for_each(head, &lists[level]) {
1993                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1994                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1995                 ckey = nilfs_btree_node_get_key(cnode, 0);
1996                 if (key < ckey)
1997                         break;
1998         }
1999         list_add_tail(&bh->b_assoc_buffers, head);
2000 }
2001
2002 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
2003                                              struct list_head *listp)
2004 {
2005         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
2006         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
2007         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2008         struct pagevec pvec;
2009         struct buffer_head *bh, *head;
2010         pgoff_t index = 0;
2011         int level, i;
2012
2013         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2014              level < NILFS_BTREE_LEVEL_MAX;
2015              level++)
2016                 INIT_LIST_HEAD(&lists[level]);
2017
2018         pagevec_init(&pvec, 0);
2019
2020         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2021                                   PAGEVEC_SIZE)) {
2022                 for (i = 0; i < pagevec_count(&pvec); i++) {
2023                         bh = head = page_buffers(pvec.pages[i]);
2024                         do {
2025                                 if (buffer_dirty(bh))
2026                                         nilfs_btree_add_dirty_buffer(btree,
2027                                                                      lists, bh);
2028                         } while ((bh = bh->b_this_page) != head);
2029                 }
2030                 pagevec_release(&pvec);
2031                 cond_resched();
2032         }
2033
2034         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2035              level < NILFS_BTREE_LEVEL_MAX;
2036              level++)
2037                 list_splice(&lists[level], listp->prev);
2038 }
2039
2040 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2041                                 struct nilfs_btree_path *path,
2042                                 int level,
2043                                 struct buffer_head **bh,
2044                                 sector_t blocknr,
2045                                 union nilfs_binfo *binfo)
2046 {
2047         struct nilfs_btree_node *parent;
2048         __u64 key;
2049         __u64 ptr;
2050         int ret;
2051
2052         parent = nilfs_btree_get_node(btree, path, level + 1);
2053         ptr = nilfs_btree_node_get_ptr(btree, parent,
2054                                        path[level + 1].bp_index);
2055         if (buffer_nilfs_node(*bh)) {
2056                 path[level].bp_ctxt.oldkey = ptr;
2057                 path[level].bp_ctxt.newkey = blocknr;
2058                 path[level].bp_ctxt.bh = *bh;
2059                 ret = nilfs_btnode_prepare_change_key(
2060                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2061                         &path[level].bp_ctxt);
2062                 if (ret < 0)
2063                         return ret;
2064                 nilfs_btnode_commit_change_key(
2065                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2066                         &path[level].bp_ctxt);
2067                 *bh = path[level].bp_ctxt.bh;
2068         }
2069
2070         nilfs_btree_node_set_ptr(btree, parent,
2071                                  path[level + 1].bp_index, blocknr);
2072
2073         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2074         /* on-disk format */
2075         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2076         binfo->bi_dat.bi_level = level;
2077
2078         return 0;
2079 }
2080
2081 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2082                                 struct nilfs_btree_path *path,
2083                                 int level,
2084                                 struct buffer_head **bh,
2085                                 sector_t blocknr,
2086                                 union nilfs_binfo *binfo)
2087 {
2088         struct nilfs_btree_node *parent;
2089         __u64 key;
2090         __u64 ptr;
2091         union nilfs_bmap_ptr_req req;
2092         int ret;
2093
2094         parent = nilfs_btree_get_node(btree, path, level + 1);
2095         ptr = nilfs_btree_node_get_ptr(btree, parent,
2096                                        path[level + 1].bp_index);
2097         req.bpr_ptr = ptr;
2098         ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr);
2099         if (unlikely(ret < 0))
2100                 return ret;
2101
2102         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2103         /* on-disk format */
2104         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2105         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2106
2107         return 0;
2108 }
2109
2110 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2111                               struct buffer_head **bh,
2112                               sector_t blocknr,
2113                               union nilfs_binfo *binfo)
2114 {
2115         struct nilfs_btree *btree;
2116         struct nilfs_btree_path *path;
2117         struct nilfs_btree_node *node;
2118         __u64 key;
2119         int level, ret;
2120
2121         btree = (struct nilfs_btree *)bmap;
2122         path = nilfs_btree_alloc_path();
2123         if (path == NULL)
2124                 return -ENOMEM;
2125         nilfs_btree_init_path(path);
2126
2127         if (buffer_nilfs_node(*bh)) {
2128                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2129                 key = nilfs_btree_node_get_key(node, 0);
2130                 level = nilfs_btree_node_get_level(node);
2131         } else {
2132                 key = nilfs_bmap_data_get_key(bmap, *bh);
2133                 level = NILFS_BTREE_LEVEL_DATA;
2134         }
2135
2136         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2137         if (ret < 0) {
2138                 WARN_ON(ret == -ENOENT);
2139                 goto out;
2140         }
2141
2142         ret = NILFS_BMAP_USE_VBN(bmap) ?
2143                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2144                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2145
2146  out:
2147         nilfs_btree_release_path(path);
2148         nilfs_btree_free_path(path);
2149
2150         return ret;
2151 }
2152
2153 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2154                                  struct buffer_head **bh,
2155                                  sector_t blocknr,
2156                                  union nilfs_binfo *binfo)
2157 {
2158         struct nilfs_btree *btree;
2159         struct nilfs_btree_node *node;
2160         __u64 key;
2161         int ret;
2162
2163         btree = (struct nilfs_btree *)bmap;
2164         ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr);
2165         if (ret < 0)
2166                 return ret;
2167
2168         if (buffer_nilfs_node(*bh)) {
2169                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2170                 key = nilfs_btree_node_get_key(node, 0);
2171         } else
2172                 key = nilfs_bmap_data_get_key(bmap, *bh);
2173
2174         /* on-disk format */
2175         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2176         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2177
2178         return 0;
2179 }
2180
2181 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2182 {
2183         struct buffer_head *bh;
2184         struct nilfs_btree *btree;
2185         struct nilfs_btree_path *path;
2186         __u64 ptr;
2187         int ret;
2188
2189         btree = (struct nilfs_btree *)bmap;
2190         path = nilfs_btree_alloc_path();
2191         if (path == NULL)
2192                 return -ENOMEM;
2193         nilfs_btree_init_path(path);
2194
2195         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2196         if (ret < 0) {
2197                 WARN_ON(ret == -ENOENT);
2198                 goto out;
2199         }
2200         ret = nilfs_btree_get_block(btree, ptr, &bh);
2201         if (ret < 0) {
2202                 WARN_ON(ret == -ENOENT);
2203                 goto out;
2204         }
2205
2206         if (!buffer_dirty(bh))
2207                 nilfs_btnode_mark_dirty(bh);
2208         brelse(bh);
2209         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2210                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2211
2212  out:
2213         nilfs_btree_release_path(path);
2214         nilfs_btree_free_path(path);
2215         return ret;
2216 }
2217
2218 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2219         .bop_lookup             =       nilfs_btree_lookup,
2220         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2221         .bop_insert             =       nilfs_btree_insert,
2222         .bop_delete             =       nilfs_btree_delete,
2223         .bop_clear              =       NULL,
2224
2225         .bop_propagate          =       nilfs_btree_propagate,
2226
2227         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2228
2229         .bop_assign             =       nilfs_btree_assign,
2230         .bop_mark               =       nilfs_btree_mark,
2231
2232         .bop_last_key           =       nilfs_btree_last_key,
2233         .bop_check_insert       =       NULL,
2234         .bop_check_delete       =       nilfs_btree_check_delete,
2235         .bop_gather_data        =       nilfs_btree_gather_data,
2236 };
2237
2238 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2239         .bop_lookup             =       NULL,
2240         .bop_lookup_contig      =       NULL,
2241         .bop_insert             =       NULL,
2242         .bop_delete             =       NULL,
2243         .bop_clear              =       NULL,
2244
2245         .bop_propagate          =       nilfs_btree_propagate_gc,
2246
2247         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2248
2249         .bop_assign             =       nilfs_btree_assign_gc,
2250         .bop_mark               =       NULL,
2251
2252         .bop_last_key           =       NULL,
2253         .bop_check_insert       =       NULL,
2254         .bop_check_delete       =       NULL,
2255         .bop_gather_data        =       NULL,
2256 };
2257
2258 int nilfs_btree_init(struct nilfs_bmap *bmap)
2259 {
2260         bmap->b_ops = &nilfs_btree_ops;
2261         return 0;
2262 }
2263
2264 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2265 {
2266         bmap->b_ops = &nilfs_btree_ops_gc;
2267 }