trivial: fix typo "for for" in multiple files
[safe/jmp/linux-2.6] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_inode_item.h"
38 #include "xfs_btree.h"
39 #include "xfs_btree_trace.h"
40 #include "xfs_ialloc.h"
41 #include "xfs_error.h"
42
43 /*
44  * Cursor allocation zone.
45  */
46 kmem_zone_t     *xfs_btree_cur_zone;
47
48 /*
49  * Btree magic numbers.
50  */
51 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
52         XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
53 };
54
55
56 STATIC int                              /* error (0 or EFSCORRUPTED) */
57 xfs_btree_check_lblock(
58         struct xfs_btree_cur    *cur,   /* btree cursor */
59         struct xfs_btree_block  *block, /* btree long form block pointer */
60         int                     level,  /* level of the btree block */
61         struct xfs_buf          *bp)    /* buffer for block, if any */
62 {
63         int                     lblock_ok; /* block passes checks */
64         struct xfs_mount        *mp;    /* file system mount point */
65
66         mp = cur->bc_mp;
67         lblock_ok =
68                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
69                 be16_to_cpu(block->bb_level) == level &&
70                 be16_to_cpu(block->bb_numrecs) <=
71                         cur->bc_ops->get_maxrecs(cur, level) &&
72                 block->bb_u.l.bb_leftsib &&
73                 (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO ||
74                  XFS_FSB_SANITY_CHECK(mp,
75                         be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
76                 block->bb_u.l.bb_rightsib &&
77                 (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO ||
78                  XFS_FSB_SANITY_CHECK(mp,
79                         be64_to_cpu(block->bb_u.l.bb_rightsib)));
80         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
81                         XFS_ERRTAG_BTREE_CHECK_LBLOCK,
82                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
83                 if (bp)
84                         xfs_buftrace("LBTREE ERROR", bp);
85                 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
86                                  mp);
87                 return XFS_ERROR(EFSCORRUPTED);
88         }
89         return 0;
90 }
91
92 STATIC int                              /* error (0 or EFSCORRUPTED) */
93 xfs_btree_check_sblock(
94         struct xfs_btree_cur    *cur,   /* btree cursor */
95         struct xfs_btree_block  *block, /* btree short form block pointer */
96         int                     level,  /* level of the btree block */
97         struct xfs_buf          *bp)    /* buffer containing block */
98 {
99         struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
100         struct xfs_agf          *agf;   /* ag. freespace structure */
101         xfs_agblock_t           agflen; /* native ag. freespace length */
102         int                     sblock_ok; /* block passes checks */
103
104         agbp = cur->bc_private.a.agbp;
105         agf = XFS_BUF_TO_AGF(agbp);
106         agflen = be32_to_cpu(agf->agf_length);
107         sblock_ok =
108                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
109                 be16_to_cpu(block->bb_level) == level &&
110                 be16_to_cpu(block->bb_numrecs) <=
111                         cur->bc_ops->get_maxrecs(cur, level) &&
112                 (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK ||
113                  be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
114                 block->bb_u.s.bb_leftsib &&
115                 (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK ||
116                  be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
117                 block->bb_u.s.bb_rightsib;
118         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
119                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
120                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
121                 if (bp)
122                         xfs_buftrace("SBTREE ERROR", bp);
123                 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
124                         XFS_ERRLEVEL_LOW, cur->bc_mp, block);
125                 return XFS_ERROR(EFSCORRUPTED);
126         }
127         return 0;
128 }
129
130 /*
131  * Debug routine: check that block header is ok.
132  */
133 int
134 xfs_btree_check_block(
135         struct xfs_btree_cur    *cur,   /* btree cursor */
136         struct xfs_btree_block  *block, /* generic btree block pointer */
137         int                     level,  /* level of the btree block */
138         struct xfs_buf          *bp)    /* buffer containing block, if any */
139 {
140         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
141                 return xfs_btree_check_lblock(cur, block, level, bp);
142         else
143                 return xfs_btree_check_sblock(cur, block, level, bp);
144 }
145
146 /*
147  * Check that (long) pointer is ok.
148  */
149 int                                     /* error (0 or EFSCORRUPTED) */
150 xfs_btree_check_lptr(
151         struct xfs_btree_cur    *cur,   /* btree cursor */
152         xfs_dfsbno_t            bno,    /* btree block disk address */
153         int                     level)  /* btree block level */
154 {
155         XFS_WANT_CORRUPTED_RETURN(
156                 level > 0 &&
157                 bno != NULLDFSBNO &&
158                 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
159         return 0;
160 }
161
162 #ifdef DEBUG
163 /*
164  * Check that (short) pointer is ok.
165  */
166 STATIC int                              /* error (0 or EFSCORRUPTED) */
167 xfs_btree_check_sptr(
168         struct xfs_btree_cur    *cur,   /* btree cursor */
169         xfs_agblock_t           bno,    /* btree block disk address */
170         int                     level)  /* btree block level */
171 {
172         xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
173
174         XFS_WANT_CORRUPTED_RETURN(
175                 level > 0 &&
176                 bno != NULLAGBLOCK &&
177                 bno != 0 &&
178                 bno < agblocks);
179         return 0;
180 }
181
182 /*
183  * Check that block ptr is ok.
184  */
185 STATIC int                              /* error (0 or EFSCORRUPTED) */
186 xfs_btree_check_ptr(
187         struct xfs_btree_cur    *cur,   /* btree cursor */
188         union xfs_btree_ptr     *ptr,   /* btree block disk address */
189         int                     index,  /* offset from ptr to check */
190         int                     level)  /* btree block level */
191 {
192         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
193                 return xfs_btree_check_lptr(cur,
194                                 be64_to_cpu((&ptr->l)[index]), level);
195         } else {
196                 return xfs_btree_check_sptr(cur,
197                                 be32_to_cpu((&ptr->s)[index]), level);
198         }
199 }
200 #endif
201
202 /*
203  * Delete the btree cursor.
204  */
205 void
206 xfs_btree_del_cursor(
207         xfs_btree_cur_t *cur,           /* btree cursor */
208         int             error)          /* del because of error */
209 {
210         int             i;              /* btree level */
211
212         /*
213          * Clear the buffer pointers, and release the buffers.
214          * If we're doing this in the face of an error, we
215          * need to make sure to inspect all of the entries
216          * in the bc_bufs array for buffers to be unlocked.
217          * This is because some of the btree code works from
218          * level n down to 0, and if we get an error along
219          * the way we won't have initialized all the entries
220          * down to 0.
221          */
222         for (i = 0; i < cur->bc_nlevels; i++) {
223                 if (cur->bc_bufs[i])
224                         xfs_btree_setbuf(cur, i, NULL);
225                 else if (!error)
226                         break;
227         }
228         /*
229          * Can't free a bmap cursor without having dealt with the
230          * allocated indirect blocks' accounting.
231          */
232         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
233                cur->bc_private.b.allocated == 0);
234         /*
235          * Free the cursor.
236          */
237         kmem_zone_free(xfs_btree_cur_zone, cur);
238 }
239
240 /*
241  * Duplicate the btree cursor.
242  * Allocate a new one, copy the record, re-get the buffers.
243  */
244 int                                     /* error */
245 xfs_btree_dup_cursor(
246         xfs_btree_cur_t *cur,           /* input cursor */
247         xfs_btree_cur_t **ncur)         /* output cursor */
248 {
249         xfs_buf_t       *bp;            /* btree block's buffer pointer */
250         int             error;          /* error return value */
251         int             i;              /* level number of btree block */
252         xfs_mount_t     *mp;            /* mount structure for filesystem */
253         xfs_btree_cur_t *new;           /* new cursor value */
254         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
255
256         tp = cur->bc_tp;
257         mp = cur->bc_mp;
258
259         /*
260          * Allocate a new cursor like the old one.
261          */
262         new = cur->bc_ops->dup_cursor(cur);
263
264         /*
265          * Copy the record currently in the cursor.
266          */
267         new->bc_rec = cur->bc_rec;
268
269         /*
270          * For each level current, re-get the buffer and copy the ptr value.
271          */
272         for (i = 0; i < new->bc_nlevels; i++) {
273                 new->bc_ptrs[i] = cur->bc_ptrs[i];
274                 new->bc_ra[i] = cur->bc_ra[i];
275                 if ((bp = cur->bc_bufs[i])) {
276                         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
277                                 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
278                                 xfs_btree_del_cursor(new, error);
279                                 *ncur = NULL;
280                                 return error;
281                         }
282                         new->bc_bufs[i] = bp;
283                         ASSERT(bp);
284                         ASSERT(!XFS_BUF_GETERROR(bp));
285                 } else
286                         new->bc_bufs[i] = NULL;
287         }
288         *ncur = new;
289         return 0;
290 }
291
292 /*
293  * XFS btree block layout and addressing:
294  *
295  * There are two types of blocks in the btree: leaf and non-leaf blocks.
296  *
297  * The leaf record start with a header then followed by records containing
298  * the values.  A non-leaf block also starts with the same header, and
299  * then first contains lookup keys followed by an equal number of pointers
300  * to the btree blocks at the previous level.
301  *
302  *              +--------+-------+-------+-------+-------+-------+-------+
303  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
304  *              +--------+-------+-------+-------+-------+-------+-------+
305  *
306  *              +--------+-------+-------+-------+-------+-------+-------+
307  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
308  *              +--------+-------+-------+-------+-------+-------+-------+
309  *
310  * The header is called struct xfs_btree_block for reasons better left unknown
311  * and comes in different versions for short (32bit) and long (64bit) block
312  * pointers.  The record and key structures are defined by the btree instances
313  * and opaque to the btree core.  The block pointers are simple disk endian
314  * integers, available in a short (32bit) and long (64bit) variant.
315  *
316  * The helpers below calculate the offset of a given record, key or pointer
317  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
318  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
319  * inside the btree block is done using indices starting at one, not zero!
320  */
321
322 /*
323  * Return size of the btree block header for this btree instance.
324  */
325 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
326 {
327         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
328                 XFS_BTREE_LBLOCK_LEN :
329                 XFS_BTREE_SBLOCK_LEN;
330 }
331
332 /*
333  * Return size of btree block pointers for this btree instance.
334  */
335 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
336 {
337         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
338                 sizeof(__be64) : sizeof(__be32);
339 }
340
341 /*
342  * Calculate offset of the n-th record in a btree block.
343  */
344 STATIC size_t
345 xfs_btree_rec_offset(
346         struct xfs_btree_cur    *cur,
347         int                     n)
348 {
349         return xfs_btree_block_len(cur) +
350                 (n - 1) * cur->bc_ops->rec_len;
351 }
352
353 /*
354  * Calculate offset of the n-th key in a btree block.
355  */
356 STATIC size_t
357 xfs_btree_key_offset(
358         struct xfs_btree_cur    *cur,
359         int                     n)
360 {
361         return xfs_btree_block_len(cur) +
362                 (n - 1) * cur->bc_ops->key_len;
363 }
364
365 /*
366  * Calculate offset of the n-th block pointer in a btree block.
367  */
368 STATIC size_t
369 xfs_btree_ptr_offset(
370         struct xfs_btree_cur    *cur,
371         int                     n,
372         int                     level)
373 {
374         return xfs_btree_block_len(cur) +
375                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
376                 (n - 1) * xfs_btree_ptr_len(cur);
377 }
378
379 /*
380  * Return a pointer to the n-th record in the btree block.
381  */
382 STATIC union xfs_btree_rec *
383 xfs_btree_rec_addr(
384         struct xfs_btree_cur    *cur,
385         int                     n,
386         struct xfs_btree_block  *block)
387 {
388         return (union xfs_btree_rec *)
389                 ((char *)block + xfs_btree_rec_offset(cur, n));
390 }
391
392 /*
393  * Return a pointer to the n-th key in the btree block.
394  */
395 STATIC union xfs_btree_key *
396 xfs_btree_key_addr(
397         struct xfs_btree_cur    *cur,
398         int                     n,
399         struct xfs_btree_block  *block)
400 {
401         return (union xfs_btree_key *)
402                 ((char *)block + xfs_btree_key_offset(cur, n));
403 }
404
405 /*
406  * Return a pointer to the n-th block pointer in the btree block.
407  */
408 STATIC union xfs_btree_ptr *
409 xfs_btree_ptr_addr(
410         struct xfs_btree_cur    *cur,
411         int                     n,
412         struct xfs_btree_block  *block)
413 {
414         int                     level = xfs_btree_get_level(block);
415
416         ASSERT(block->bb_level != 0);
417
418         return (union xfs_btree_ptr *)
419                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
420 }
421
422 /*
423  * Get a the root block which is stored in the inode.
424  *
425  * For now this btree implementation assumes the btree root is always
426  * stored in the if_broot field of an inode fork.
427  */
428 STATIC struct xfs_btree_block *
429 xfs_btree_get_iroot(
430        struct xfs_btree_cur    *cur)
431 {
432        struct xfs_ifork        *ifp;
433
434        ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
435        return (struct xfs_btree_block *)ifp->if_broot;
436 }
437
438 /*
439  * Retrieve the block pointer from the cursor at the given level.
440  * This may be an inode btree root or from a buffer.
441  */
442 STATIC struct xfs_btree_block *         /* generic btree block pointer */
443 xfs_btree_get_block(
444         struct xfs_btree_cur    *cur,   /* btree cursor */
445         int                     level,  /* level in btree */
446         struct xfs_buf          **bpp)  /* buffer containing the block */
447 {
448         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
449             (level == cur->bc_nlevels - 1)) {
450                 *bpp = NULL;
451                 return xfs_btree_get_iroot(cur);
452         }
453
454         *bpp = cur->bc_bufs[level];
455         return XFS_BUF_TO_BLOCK(*bpp);
456 }
457
458 /*
459  * Get a buffer for the block, return it with no data read.
460  * Long-form addressing.
461  */
462 xfs_buf_t *                             /* buffer for fsbno */
463 xfs_btree_get_bufl(
464         xfs_mount_t     *mp,            /* file system mount point */
465         xfs_trans_t     *tp,            /* transaction pointer */
466         xfs_fsblock_t   fsbno,          /* file system block number */
467         uint            lock)           /* lock flags for get_buf */
468 {
469         xfs_buf_t       *bp;            /* buffer pointer (return value) */
470         xfs_daddr_t             d;              /* real disk block address */
471
472         ASSERT(fsbno != NULLFSBLOCK);
473         d = XFS_FSB_TO_DADDR(mp, fsbno);
474         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
475         ASSERT(bp);
476         ASSERT(!XFS_BUF_GETERROR(bp));
477         return bp;
478 }
479
480 /*
481  * Get a buffer for the block, return it with no data read.
482  * Short-form addressing.
483  */
484 xfs_buf_t *                             /* buffer for agno/agbno */
485 xfs_btree_get_bufs(
486         xfs_mount_t     *mp,            /* file system mount point */
487         xfs_trans_t     *tp,            /* transaction pointer */
488         xfs_agnumber_t  agno,           /* allocation group number */
489         xfs_agblock_t   agbno,          /* allocation group block number */
490         uint            lock)           /* lock flags for get_buf */
491 {
492         xfs_buf_t       *bp;            /* buffer pointer (return value) */
493         xfs_daddr_t             d;              /* real disk block address */
494
495         ASSERT(agno != NULLAGNUMBER);
496         ASSERT(agbno != NULLAGBLOCK);
497         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
498         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
499         ASSERT(bp);
500         ASSERT(!XFS_BUF_GETERROR(bp));
501         return bp;
502 }
503
504 /*
505  * Check for the cursor referring to the last block at the given level.
506  */
507 int                                     /* 1=is last block, 0=not last block */
508 xfs_btree_islastblock(
509         xfs_btree_cur_t         *cur,   /* btree cursor */
510         int                     level)  /* level to check */
511 {
512         struct xfs_btree_block  *block; /* generic btree block pointer */
513         xfs_buf_t               *bp;    /* buffer containing block */
514
515         block = xfs_btree_get_block(cur, level, &bp);
516         xfs_btree_check_block(cur, block, level, bp);
517         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
518                 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
519         else
520                 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
521 }
522
523 /*
524  * Change the cursor to point to the first record at the given level.
525  * Other levels are unaffected.
526  */
527 STATIC int                              /* success=1, failure=0 */
528 xfs_btree_firstrec(
529         xfs_btree_cur_t         *cur,   /* btree cursor */
530         int                     level)  /* level to change */
531 {
532         struct xfs_btree_block  *block; /* generic btree block pointer */
533         xfs_buf_t               *bp;    /* buffer containing block */
534
535         /*
536          * Get the block pointer for this level.
537          */
538         block = xfs_btree_get_block(cur, level, &bp);
539         xfs_btree_check_block(cur, block, level, bp);
540         /*
541          * It's empty, there is no such record.
542          */
543         if (!block->bb_numrecs)
544                 return 0;
545         /*
546          * Set the ptr value to 1, that's the first record/key.
547          */
548         cur->bc_ptrs[level] = 1;
549         return 1;
550 }
551
552 /*
553  * Change the cursor to point to the last record in the current block
554  * at the given level.  Other levels are unaffected.
555  */
556 STATIC int                              /* success=1, failure=0 */
557 xfs_btree_lastrec(
558         xfs_btree_cur_t         *cur,   /* btree cursor */
559         int                     level)  /* level to change */
560 {
561         struct xfs_btree_block  *block; /* generic btree block pointer */
562         xfs_buf_t               *bp;    /* buffer containing block */
563
564         /*
565          * Get the block pointer for this level.
566          */
567         block = xfs_btree_get_block(cur, level, &bp);
568         xfs_btree_check_block(cur, block, level, bp);
569         /*
570          * It's empty, there is no such record.
571          */
572         if (!block->bb_numrecs)
573                 return 0;
574         /*
575          * Set the ptr value to numrecs, that's the last record/key.
576          */
577         cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
578         return 1;
579 }
580
581 /*
582  * Compute first and last byte offsets for the fields given.
583  * Interprets the offsets table, which contains struct field offsets.
584  */
585 void
586 xfs_btree_offsets(
587         __int64_t       fields,         /* bitmask of fields */
588         const short     *offsets,       /* table of field offsets */
589         int             nbits,          /* number of bits to inspect */
590         int             *first,         /* output: first byte offset */
591         int             *last)          /* output: last byte offset */
592 {
593         int             i;              /* current bit number */
594         __int64_t       imask;          /* mask for current bit number */
595
596         ASSERT(fields != 0);
597         /*
598          * Find the lowest bit, so the first byte offset.
599          */
600         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
601                 if (imask & fields) {
602                         *first = offsets[i];
603                         break;
604                 }
605         }
606         /*
607          * Find the highest bit, so the last byte offset.
608          */
609         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
610                 if (imask & fields) {
611                         *last = offsets[i + 1] - 1;
612                         break;
613                 }
614         }
615 }
616
617 /*
618  * Get a buffer for the block, return it read in.
619  * Long-form addressing.
620  */
621 int                                     /* error */
622 xfs_btree_read_bufl(
623         xfs_mount_t     *mp,            /* file system mount point */
624         xfs_trans_t     *tp,            /* transaction pointer */
625         xfs_fsblock_t   fsbno,          /* file system block number */
626         uint            lock,           /* lock flags for read_buf */
627         xfs_buf_t       **bpp,          /* buffer for fsbno */
628         int             refval)         /* ref count value for buffer */
629 {
630         xfs_buf_t       *bp;            /* return value */
631         xfs_daddr_t             d;              /* real disk block address */
632         int             error;
633
634         ASSERT(fsbno != NULLFSBLOCK);
635         d = XFS_FSB_TO_DADDR(mp, fsbno);
636         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
637                         mp->m_bsize, lock, &bp))) {
638                 return error;
639         }
640         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
641         if (bp != NULL) {
642                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
643         }
644         *bpp = bp;
645         return 0;
646 }
647
648 /*
649  * Read-ahead the block, don't wait for it, don't return a buffer.
650  * Long-form addressing.
651  */
652 /* ARGSUSED */
653 void
654 xfs_btree_reada_bufl(
655         xfs_mount_t     *mp,            /* file system mount point */
656         xfs_fsblock_t   fsbno,          /* file system block number */
657         xfs_extlen_t    count)          /* count of filesystem blocks */
658 {
659         xfs_daddr_t             d;
660
661         ASSERT(fsbno != NULLFSBLOCK);
662         d = XFS_FSB_TO_DADDR(mp, fsbno);
663         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
664 }
665
666 /*
667  * Read-ahead the block, don't wait for it, don't return a buffer.
668  * Short-form addressing.
669  */
670 /* ARGSUSED */
671 void
672 xfs_btree_reada_bufs(
673         xfs_mount_t     *mp,            /* file system mount point */
674         xfs_agnumber_t  agno,           /* allocation group number */
675         xfs_agblock_t   agbno,          /* allocation group block number */
676         xfs_extlen_t    count)          /* count of filesystem blocks */
677 {
678         xfs_daddr_t             d;
679
680         ASSERT(agno != NULLAGNUMBER);
681         ASSERT(agbno != NULLAGBLOCK);
682         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
683         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
684 }
685
686 STATIC int
687 xfs_btree_readahead_lblock(
688         struct xfs_btree_cur    *cur,
689         int                     lr,
690         struct xfs_btree_block  *block)
691 {
692         int                     rval = 0;
693         xfs_dfsbno_t            left = be64_to_cpu(block->bb_u.l.bb_leftsib);
694         xfs_dfsbno_t            right = be64_to_cpu(block->bb_u.l.bb_rightsib);
695
696         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
697                 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
698                 rval++;
699         }
700
701         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
702                 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
703                 rval++;
704         }
705
706         return rval;
707 }
708
709 STATIC int
710 xfs_btree_readahead_sblock(
711         struct xfs_btree_cur    *cur,
712         int                     lr,
713         struct xfs_btree_block *block)
714 {
715         int                     rval = 0;
716         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
717         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
718
719
720         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
721                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
722                                      left, 1);
723                 rval++;
724         }
725
726         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
727                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
728                                      right, 1);
729                 rval++;
730         }
731
732         return rval;
733 }
734
735 /*
736  * Read-ahead btree blocks, at the given level.
737  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
738  */
739 STATIC int
740 xfs_btree_readahead(
741         struct xfs_btree_cur    *cur,           /* btree cursor */
742         int                     lev,            /* level in btree */
743         int                     lr)             /* left/right bits */
744 {
745         struct xfs_btree_block  *block;
746
747         /*
748          * No readahead needed if we are at the root level and the
749          * btree root is stored in the inode.
750          */
751         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
752             (lev == cur->bc_nlevels - 1))
753                 return 0;
754
755         if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
756                 return 0;
757
758         cur->bc_ra[lev] |= lr;
759         block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
760
761         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
762                 return xfs_btree_readahead_lblock(cur, lr, block);
763         return xfs_btree_readahead_sblock(cur, lr, block);
764 }
765
766 /*
767  * Set the buffer for level "lev" in the cursor to bp, releasing
768  * any previous buffer.
769  */
770 void
771 xfs_btree_setbuf(
772         xfs_btree_cur_t         *cur,   /* btree cursor */
773         int                     lev,    /* level in btree */
774         xfs_buf_t               *bp)    /* new buffer to set */
775 {
776         struct xfs_btree_block  *b;     /* btree block */
777         xfs_buf_t               *obp;   /* old buffer pointer */
778
779         obp = cur->bc_bufs[lev];
780         if (obp)
781                 xfs_trans_brelse(cur->bc_tp, obp);
782         cur->bc_bufs[lev] = bp;
783         cur->bc_ra[lev] = 0;
784         if (!bp)
785                 return;
786         b = XFS_BUF_TO_BLOCK(bp);
787         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
788                 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
789                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
790                 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
791                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
792         } else {
793                 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
794                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
795                 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
796                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
797         }
798 }
799
800 STATIC int
801 xfs_btree_ptr_is_null(
802         struct xfs_btree_cur    *cur,
803         union xfs_btree_ptr     *ptr)
804 {
805         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
806                 return be64_to_cpu(ptr->l) == NULLDFSBNO;
807         else
808                 return be32_to_cpu(ptr->s) == NULLAGBLOCK;
809 }
810
811 STATIC void
812 xfs_btree_set_ptr_null(
813         struct xfs_btree_cur    *cur,
814         union xfs_btree_ptr     *ptr)
815 {
816         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
817                 ptr->l = cpu_to_be64(NULLDFSBNO);
818         else
819                 ptr->s = cpu_to_be32(NULLAGBLOCK);
820 }
821
822 /*
823  * Get/set/init sibling pointers
824  */
825 STATIC void
826 xfs_btree_get_sibling(
827         struct xfs_btree_cur    *cur,
828         struct xfs_btree_block  *block,
829         union xfs_btree_ptr     *ptr,
830         int                     lr)
831 {
832         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
833
834         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
835                 if (lr == XFS_BB_RIGHTSIB)
836                         ptr->l = block->bb_u.l.bb_rightsib;
837                 else
838                         ptr->l = block->bb_u.l.bb_leftsib;
839         } else {
840                 if (lr == XFS_BB_RIGHTSIB)
841                         ptr->s = block->bb_u.s.bb_rightsib;
842                 else
843                         ptr->s = block->bb_u.s.bb_leftsib;
844         }
845 }
846
847 STATIC void
848 xfs_btree_set_sibling(
849         struct xfs_btree_cur    *cur,
850         struct xfs_btree_block  *block,
851         union xfs_btree_ptr     *ptr,
852         int                     lr)
853 {
854         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
855
856         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
857                 if (lr == XFS_BB_RIGHTSIB)
858                         block->bb_u.l.bb_rightsib = ptr->l;
859                 else
860                         block->bb_u.l.bb_leftsib = ptr->l;
861         } else {
862                 if (lr == XFS_BB_RIGHTSIB)
863                         block->bb_u.s.bb_rightsib = ptr->s;
864                 else
865                         block->bb_u.s.bb_leftsib = ptr->s;
866         }
867 }
868
869 STATIC void
870 xfs_btree_init_block(
871         struct xfs_btree_cur    *cur,
872         int                     level,
873         int                     numrecs,
874         struct xfs_btree_block  *new)   /* new block */
875 {
876         new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
877         new->bb_level = cpu_to_be16(level);
878         new->bb_numrecs = cpu_to_be16(numrecs);
879
880         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
881                 new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
882                 new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
883         } else {
884                 new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
885                 new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
886         }
887 }
888
889 /*
890  * Return true if ptr is the last record in the btree and
891  * we need to track updateÑ• to this record.  The decision
892  * will be further refined in the update_lastrec method.
893  */
894 STATIC int
895 xfs_btree_is_lastrec(
896         struct xfs_btree_cur    *cur,
897         struct xfs_btree_block  *block,
898         int                     level)
899 {
900         union xfs_btree_ptr     ptr;
901
902         if (level > 0)
903                 return 0;
904         if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
905                 return 0;
906
907         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
908         if (!xfs_btree_ptr_is_null(cur, &ptr))
909                 return 0;
910         return 1;
911 }
912
913 STATIC void
914 xfs_btree_buf_to_ptr(
915         struct xfs_btree_cur    *cur,
916         struct xfs_buf          *bp,
917         union xfs_btree_ptr     *ptr)
918 {
919         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
920                 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
921                                         XFS_BUF_ADDR(bp)));
922         else {
923                 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
924                                         XFS_BUF_ADDR(bp)));
925         }
926 }
927
928 STATIC xfs_daddr_t
929 xfs_btree_ptr_to_daddr(
930         struct xfs_btree_cur    *cur,
931         union xfs_btree_ptr     *ptr)
932 {
933         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
934                 ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO);
935
936                 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
937         } else {
938                 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
939                 ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
940
941                 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
942                                         be32_to_cpu(ptr->s));
943         }
944 }
945
946 STATIC void
947 xfs_btree_set_refs(
948         struct xfs_btree_cur    *cur,
949         struct xfs_buf          *bp)
950 {
951         switch (cur->bc_btnum) {
952         case XFS_BTNUM_BNO:
953         case XFS_BTNUM_CNT:
954                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
955                 break;
956         case XFS_BTNUM_INO:
957                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
958                 break;
959         case XFS_BTNUM_BMAP:
960                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
961                 break;
962         default:
963                 ASSERT(0);
964         }
965 }
966
967 STATIC int
968 xfs_btree_get_buf_block(
969         struct xfs_btree_cur    *cur,
970         union xfs_btree_ptr     *ptr,
971         int                     flags,
972         struct xfs_btree_block  **block,
973         struct xfs_buf          **bpp)
974 {
975         struct xfs_mount        *mp = cur->bc_mp;
976         xfs_daddr_t             d;
977
978         /* need to sort out how callers deal with failures first */
979         ASSERT(!(flags & XFS_BUF_TRYLOCK));
980
981         d = xfs_btree_ptr_to_daddr(cur, ptr);
982         *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
983                                  mp->m_bsize, flags);
984
985         ASSERT(*bpp);
986         ASSERT(!XFS_BUF_GETERROR(*bpp));
987
988         *block = XFS_BUF_TO_BLOCK(*bpp);
989         return 0;
990 }
991
992 /*
993  * Read in the buffer at the given ptr and return the buffer and
994  * the block pointer within the buffer.
995  */
996 STATIC int
997 xfs_btree_read_buf_block(
998         struct xfs_btree_cur    *cur,
999         union xfs_btree_ptr     *ptr,
1000         int                     level,
1001         int                     flags,
1002         struct xfs_btree_block  **block,
1003         struct xfs_buf          **bpp)
1004 {
1005         struct xfs_mount        *mp = cur->bc_mp;
1006         xfs_daddr_t             d;
1007         int                     error;
1008
1009         /* need to sort out how callers deal with failures first */
1010         ASSERT(!(flags & XFS_BUF_TRYLOCK));
1011
1012         d = xfs_btree_ptr_to_daddr(cur, ptr);
1013         error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1014                                    mp->m_bsize, flags, bpp);
1015         if (error)
1016                 return error;
1017
1018         ASSERT(*bpp != NULL);
1019         ASSERT(!XFS_BUF_GETERROR(*bpp));
1020
1021         xfs_btree_set_refs(cur, *bpp);
1022         *block = XFS_BUF_TO_BLOCK(*bpp);
1023
1024         error = xfs_btree_check_block(cur, *block, level, *bpp);
1025         if (error)
1026                 xfs_trans_brelse(cur->bc_tp, *bpp);
1027         return error;
1028 }
1029
1030 /*
1031  * Copy keys from one btree block to another.
1032  */
1033 STATIC void
1034 xfs_btree_copy_keys(
1035         struct xfs_btree_cur    *cur,
1036         union xfs_btree_key     *dst_key,
1037         union xfs_btree_key     *src_key,
1038         int                     numkeys)
1039 {
1040         ASSERT(numkeys >= 0);
1041         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1042 }
1043
1044 /*
1045  * Copy records from one btree block to another.
1046  */
1047 STATIC void
1048 xfs_btree_copy_recs(
1049         struct xfs_btree_cur    *cur,
1050         union xfs_btree_rec     *dst_rec,
1051         union xfs_btree_rec     *src_rec,
1052         int                     numrecs)
1053 {
1054         ASSERT(numrecs >= 0);
1055         memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1056 }
1057
1058 /*
1059  * Copy block pointers from one btree block to another.
1060  */
1061 STATIC void
1062 xfs_btree_copy_ptrs(
1063         struct xfs_btree_cur    *cur,
1064         union xfs_btree_ptr     *dst_ptr,
1065         union xfs_btree_ptr     *src_ptr,
1066         int                     numptrs)
1067 {
1068         ASSERT(numptrs >= 0);
1069         memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1070 }
1071
1072 /*
1073  * Shift keys one index left/right inside a single btree block.
1074  */
1075 STATIC void
1076 xfs_btree_shift_keys(
1077         struct xfs_btree_cur    *cur,
1078         union xfs_btree_key     *key,
1079         int                     dir,
1080         int                     numkeys)
1081 {
1082         char                    *dst_key;
1083
1084         ASSERT(numkeys >= 0);
1085         ASSERT(dir == 1 || dir == -1);
1086
1087         dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1088         memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1089 }
1090
1091 /*
1092  * Shift records one index left/right inside a single btree block.
1093  */
1094 STATIC void
1095 xfs_btree_shift_recs(
1096         struct xfs_btree_cur    *cur,
1097         union xfs_btree_rec     *rec,
1098         int                     dir,
1099         int                     numrecs)
1100 {
1101         char                    *dst_rec;
1102
1103         ASSERT(numrecs >= 0);
1104         ASSERT(dir == 1 || dir == -1);
1105
1106         dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1107         memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1108 }
1109
1110 /*
1111  * Shift block pointers one index left/right inside a single btree block.
1112  */
1113 STATIC void
1114 xfs_btree_shift_ptrs(
1115         struct xfs_btree_cur    *cur,
1116         union xfs_btree_ptr     *ptr,
1117         int                     dir,
1118         int                     numptrs)
1119 {
1120         char                    *dst_ptr;
1121
1122         ASSERT(numptrs >= 0);
1123         ASSERT(dir == 1 || dir == -1);
1124
1125         dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1126         memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1127 }
1128
1129 /*
1130  * Log key values from the btree block.
1131  */
1132 STATIC void
1133 xfs_btree_log_keys(
1134         struct xfs_btree_cur    *cur,
1135         struct xfs_buf          *bp,
1136         int                     first,
1137         int                     last)
1138 {
1139         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1140         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1141
1142         if (bp) {
1143                 xfs_trans_log_buf(cur->bc_tp, bp,
1144                                   xfs_btree_key_offset(cur, first),
1145                                   xfs_btree_key_offset(cur, last + 1) - 1);
1146         } else {
1147                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1148                                 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1149         }
1150
1151         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1152 }
1153
1154 /*
1155  * Log record values from the btree block.
1156  */
1157 void
1158 xfs_btree_log_recs(
1159         struct xfs_btree_cur    *cur,
1160         struct xfs_buf          *bp,
1161         int                     first,
1162         int                     last)
1163 {
1164         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1165         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1166
1167         xfs_trans_log_buf(cur->bc_tp, bp,
1168                           xfs_btree_rec_offset(cur, first),
1169                           xfs_btree_rec_offset(cur, last + 1) - 1);
1170
1171         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1172 }
1173
1174 /*
1175  * Log block pointer fields from a btree block (nonleaf).
1176  */
1177 STATIC void
1178 xfs_btree_log_ptrs(
1179         struct xfs_btree_cur    *cur,   /* btree cursor */
1180         struct xfs_buf          *bp,    /* buffer containing btree block */
1181         int                     first,  /* index of first pointer to log */
1182         int                     last)   /* index of last pointer to log */
1183 {
1184         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1185         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1186
1187         if (bp) {
1188                 struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
1189                 int                     level = xfs_btree_get_level(block);
1190
1191                 xfs_trans_log_buf(cur->bc_tp, bp,
1192                                 xfs_btree_ptr_offset(cur, first, level),
1193                                 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1194         } else {
1195                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1196                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1197         }
1198
1199         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1200 }
1201
1202 /*
1203  * Log fields from a btree block header.
1204  */
1205 void
1206 xfs_btree_log_block(
1207         struct xfs_btree_cur    *cur,   /* btree cursor */
1208         struct xfs_buf          *bp,    /* buffer containing btree block */
1209         int                     fields) /* mask of fields: XFS_BB_... */
1210 {
1211         int                     first;  /* first byte offset logged */
1212         int                     last;   /* last byte offset logged */
1213         static const short      soffsets[] = {  /* table of offsets (short) */
1214                 offsetof(struct xfs_btree_block, bb_magic),
1215                 offsetof(struct xfs_btree_block, bb_level),
1216                 offsetof(struct xfs_btree_block, bb_numrecs),
1217                 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1218                 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1219                 XFS_BTREE_SBLOCK_LEN
1220         };
1221         static const short      loffsets[] = {  /* table of offsets (long) */
1222                 offsetof(struct xfs_btree_block, bb_magic),
1223                 offsetof(struct xfs_btree_block, bb_level),
1224                 offsetof(struct xfs_btree_block, bb_numrecs),
1225                 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1226                 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1227                 XFS_BTREE_LBLOCK_LEN
1228         };
1229
1230         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1231         XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1232
1233         if (bp) {
1234                 xfs_btree_offsets(fields,
1235                                   (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1236                                         loffsets : soffsets,
1237                                   XFS_BB_NUM_BITS, &first, &last);
1238                 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1239         } else {
1240                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1241                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1242         }
1243
1244         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1245 }
1246
1247 /*
1248  * Increment cursor by one record at the level.
1249  * For nonzero levels the leaf-ward information is untouched.
1250  */
1251 int                                             /* error */
1252 xfs_btree_increment(
1253         struct xfs_btree_cur    *cur,
1254         int                     level,
1255         int                     *stat)          /* success/failure */
1256 {
1257         struct xfs_btree_block  *block;
1258         union xfs_btree_ptr     ptr;
1259         struct xfs_buf          *bp;
1260         int                     error;          /* error return value */
1261         int                     lev;
1262
1263         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1264         XFS_BTREE_TRACE_ARGI(cur, level);
1265
1266         ASSERT(level < cur->bc_nlevels);
1267
1268         /* Read-ahead to the right at this level. */
1269         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1270
1271         /* Get a pointer to the btree block. */
1272         block = xfs_btree_get_block(cur, level, &bp);
1273
1274 #ifdef DEBUG
1275         error = xfs_btree_check_block(cur, block, level, bp);
1276         if (error)
1277                 goto error0;
1278 #endif
1279
1280         /* We're done if we remain in the block after the increment. */
1281         if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1282                 goto out1;
1283
1284         /* Fail if we just went off the right edge of the tree. */
1285         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1286         if (xfs_btree_ptr_is_null(cur, &ptr))
1287                 goto out0;
1288
1289         XFS_BTREE_STATS_INC(cur, increment);
1290
1291         /*
1292          * March up the tree incrementing pointers.
1293          * Stop when we don't go off the right edge of a block.
1294          */
1295         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1296                 block = xfs_btree_get_block(cur, lev, &bp);
1297
1298 #ifdef DEBUG
1299                 error = xfs_btree_check_block(cur, block, lev, bp);
1300                 if (error)
1301                         goto error0;
1302 #endif
1303
1304                 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1305                         break;
1306
1307                 /* Read-ahead the right block for the next loop. */
1308                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1309         }
1310
1311         /*
1312          * If we went off the root then we are either seriously
1313          * confused or have the tree root in an inode.
1314          */
1315         if (lev == cur->bc_nlevels) {
1316                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1317                         goto out0;
1318                 ASSERT(0);
1319                 error = EFSCORRUPTED;
1320                 goto error0;
1321         }
1322         ASSERT(lev < cur->bc_nlevels);
1323
1324         /*
1325          * Now walk back down the tree, fixing up the cursor's buffer
1326          * pointers and key numbers.
1327          */
1328         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1329                 union xfs_btree_ptr     *ptrp;
1330
1331                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1332                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1333                                                         0, &block, &bp);
1334                 if (error)
1335                         goto error0;
1336
1337                 xfs_btree_setbuf(cur, lev, bp);
1338                 cur->bc_ptrs[lev] = 1;
1339         }
1340 out1:
1341         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1342         *stat = 1;
1343         return 0;
1344
1345 out0:
1346         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1347         *stat = 0;
1348         return 0;
1349
1350 error0:
1351         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1352         return error;
1353 }
1354
1355 /*
1356  * Decrement cursor by one record at the level.
1357  * For nonzero levels the leaf-ward information is untouched.
1358  */
1359 int                                             /* error */
1360 xfs_btree_decrement(
1361         struct xfs_btree_cur    *cur,
1362         int                     level,
1363         int                     *stat)          /* success/failure */
1364 {
1365         struct xfs_btree_block  *block;
1366         xfs_buf_t               *bp;
1367         int                     error;          /* error return value */
1368         int                     lev;
1369         union xfs_btree_ptr     ptr;
1370
1371         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1372         XFS_BTREE_TRACE_ARGI(cur, level);
1373
1374         ASSERT(level < cur->bc_nlevels);
1375
1376         /* Read-ahead to the left at this level. */
1377         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1378
1379         /* We're done if we remain in the block after the decrement. */
1380         if (--cur->bc_ptrs[level] > 0)
1381                 goto out1;
1382
1383         /* Get a pointer to the btree block. */
1384         block = xfs_btree_get_block(cur, level, &bp);
1385
1386 #ifdef DEBUG
1387         error = xfs_btree_check_block(cur, block, level, bp);
1388         if (error)
1389                 goto error0;
1390 #endif
1391
1392         /* Fail if we just went off the left edge of the tree. */
1393         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1394         if (xfs_btree_ptr_is_null(cur, &ptr))
1395                 goto out0;
1396
1397         XFS_BTREE_STATS_INC(cur, decrement);
1398
1399         /*
1400          * March up the tree decrementing pointers.
1401          * Stop when we don't go off the left edge of a block.
1402          */
1403         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1404                 if (--cur->bc_ptrs[lev] > 0)
1405                         break;
1406                 /* Read-ahead the left block for the next loop. */
1407                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1408         }
1409
1410         /*
1411          * If we went off the root then we are seriously confused.
1412          * or the root of the tree is in an inode.
1413          */
1414         if (lev == cur->bc_nlevels) {
1415                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1416                         goto out0;
1417                 ASSERT(0);
1418                 error = EFSCORRUPTED;
1419                 goto error0;
1420         }
1421         ASSERT(lev < cur->bc_nlevels);
1422
1423         /*
1424          * Now walk back down the tree, fixing up the cursor's buffer
1425          * pointers and key numbers.
1426          */
1427         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1428                 union xfs_btree_ptr     *ptrp;
1429
1430                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1431                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1432                                                         0, &block, &bp);
1433                 if (error)
1434                         goto error0;
1435                 xfs_btree_setbuf(cur, lev, bp);
1436                 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1437         }
1438 out1:
1439         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1440         *stat = 1;
1441         return 0;
1442
1443 out0:
1444         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1445         *stat = 0;
1446         return 0;
1447
1448 error0:
1449         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1450         return error;
1451 }
1452
1453 STATIC int
1454 xfs_btree_lookup_get_block(
1455         struct xfs_btree_cur    *cur,   /* btree cursor */
1456         int                     level,  /* level in the btree */
1457         union xfs_btree_ptr     *pp,    /* ptr to btree block */
1458         struct xfs_btree_block  **blkp) /* return btree block */
1459 {
1460         struct xfs_buf          *bp;    /* buffer pointer for btree block */
1461         int                     error = 0;
1462
1463         /* special case the root block if in an inode */
1464         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1465             (level == cur->bc_nlevels - 1)) {
1466                 *blkp = xfs_btree_get_iroot(cur);
1467                 return 0;
1468         }
1469
1470         /*
1471          * If the old buffer at this level for the disk address we are
1472          * looking for re-use it.
1473          *
1474          * Otherwise throw it away and get a new one.
1475          */
1476         bp = cur->bc_bufs[level];
1477         if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1478                 *blkp = XFS_BUF_TO_BLOCK(bp);
1479                 return 0;
1480         }
1481
1482         error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1483         if (error)
1484                 return error;
1485
1486         xfs_btree_setbuf(cur, level, bp);
1487         return 0;
1488 }
1489
1490 /*
1491  * Get current search key.  For level 0 we don't actually have a key
1492  * structure so we make one up from the record.  For all other levels
1493  * we just return the right key.
1494  */
1495 STATIC union xfs_btree_key *
1496 xfs_lookup_get_search_key(
1497         struct xfs_btree_cur    *cur,
1498         int                     level,
1499         int                     keyno,
1500         struct xfs_btree_block  *block,
1501         union xfs_btree_key     *kp)
1502 {
1503         if (level == 0) {
1504                 cur->bc_ops->init_key_from_rec(kp,
1505                                 xfs_btree_rec_addr(cur, keyno, block));
1506                 return kp;
1507         }
1508
1509         return xfs_btree_key_addr(cur, keyno, block);
1510 }
1511
1512 /*
1513  * Lookup the record.  The cursor is made to point to it, based on dir.
1514  * Return 0 if can't find any such record, 1 for success.
1515  */
1516 int                                     /* error */
1517 xfs_btree_lookup(
1518         struct xfs_btree_cur    *cur,   /* btree cursor */
1519         xfs_lookup_t            dir,    /* <=, ==, or >= */
1520         int                     *stat)  /* success/failure */
1521 {
1522         struct xfs_btree_block  *block; /* current btree block */
1523         __int64_t               diff;   /* difference for the current key */
1524         int                     error;  /* error return value */
1525         int                     keyno;  /* current key number */
1526         int                     level;  /* level in the btree */
1527         union xfs_btree_ptr     *pp;    /* ptr to btree block */
1528         union xfs_btree_ptr     ptr;    /* ptr to btree block */
1529
1530         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1531         XFS_BTREE_TRACE_ARGI(cur, dir);
1532
1533         XFS_BTREE_STATS_INC(cur, lookup);
1534
1535         block = NULL;
1536         keyno = 0;
1537
1538         /* initialise start pointer from cursor */
1539         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1540         pp = &ptr;
1541
1542         /*
1543          * Iterate over each level in the btree, starting at the root.
1544          * For each level above the leaves, find the key we need, based
1545          * on the lookup record, then follow the corresponding block
1546          * pointer down to the next level.
1547          */
1548         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1549                 /* Get the block we need to do the lookup on. */
1550                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1551                 if (error)
1552                         goto error0;
1553
1554                 if (diff == 0) {
1555                         /*
1556                          * If we already had a key match at a higher level, we
1557                          * know we need to use the first entry in this block.
1558                          */
1559                         keyno = 1;
1560                 } else {
1561                         /* Otherwise search this block. Do a binary search. */
1562
1563                         int     high;   /* high entry number */
1564                         int     low;    /* low entry number */
1565
1566                         /* Set low and high entry numbers, 1-based. */
1567                         low = 1;
1568                         high = xfs_btree_get_numrecs(block);
1569                         if (!high) {
1570                                 /* Block is empty, must be an empty leaf. */
1571                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
1572
1573                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1574                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1575                                 *stat = 0;
1576                                 return 0;
1577                         }
1578
1579                         /* Binary search the block. */
1580                         while (low <= high) {
1581                                 union xfs_btree_key     key;
1582                                 union xfs_btree_key     *kp;
1583
1584                                 XFS_BTREE_STATS_INC(cur, compare);
1585
1586                                 /* keyno is average of low and high. */
1587                                 keyno = (low + high) >> 1;
1588
1589                                 /* Get current search key */
1590                                 kp = xfs_lookup_get_search_key(cur, level,
1591                                                 keyno, block, &key);
1592
1593                                 /*
1594                                  * Compute difference to get next direction:
1595                                  *  - less than, move right
1596                                  *  - greater than, move left
1597                                  *  - equal, we're done
1598                                  */
1599                                 diff = cur->bc_ops->key_diff(cur, kp);
1600                                 if (diff < 0)
1601                                         low = keyno + 1;
1602                                 else if (diff > 0)
1603                                         high = keyno - 1;
1604                                 else
1605                                         break;
1606                         }
1607                 }
1608
1609                 /*
1610                  * If there are more levels, set up for the next level
1611                  * by getting the block number and filling in the cursor.
1612                  */
1613                 if (level > 0) {
1614                         /*
1615                          * If we moved left, need the previous key number,
1616                          * unless there isn't one.
1617                          */
1618                         if (diff > 0 && --keyno < 1)
1619                                 keyno = 1;
1620                         pp = xfs_btree_ptr_addr(cur, keyno, block);
1621
1622 #ifdef DEBUG
1623                         error = xfs_btree_check_ptr(cur, pp, 0, level);
1624                         if (error)
1625                                 goto error0;
1626 #endif
1627                         cur->bc_ptrs[level] = keyno;
1628                 }
1629         }
1630
1631         /* Done with the search. See if we need to adjust the results. */
1632         if (dir != XFS_LOOKUP_LE && diff < 0) {
1633                 keyno++;
1634                 /*
1635                  * If ge search and we went off the end of the block, but it's
1636                  * not the last block, we're in the wrong block.
1637                  */
1638                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1639                 if (dir == XFS_LOOKUP_GE &&
1640                     keyno > xfs_btree_get_numrecs(block) &&
1641                     !xfs_btree_ptr_is_null(cur, &ptr)) {
1642                         int     i;
1643
1644                         cur->bc_ptrs[0] = keyno;
1645                         error = xfs_btree_increment(cur, 0, &i);
1646                         if (error)
1647                                 goto error0;
1648                         XFS_WANT_CORRUPTED_RETURN(i == 1);
1649                         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1650                         *stat = 1;
1651                         return 0;
1652                 }
1653         } else if (dir == XFS_LOOKUP_LE && diff > 0)
1654                 keyno--;
1655         cur->bc_ptrs[0] = keyno;
1656
1657         /* Return if we succeeded or not. */
1658         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1659                 *stat = 0;
1660         else if (dir != XFS_LOOKUP_EQ || diff == 0)
1661                 *stat = 1;
1662         else
1663                 *stat = 0;
1664         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1665         return 0;
1666
1667 error0:
1668         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1669         return error;
1670 }
1671
1672 /*
1673  * Update keys at all levels from here to the root along the cursor's path.
1674  */
1675 STATIC int
1676 xfs_btree_updkey(
1677         struct xfs_btree_cur    *cur,
1678         union xfs_btree_key     *keyp,
1679         int                     level)
1680 {
1681         struct xfs_btree_block  *block;
1682         struct xfs_buf          *bp;
1683         union xfs_btree_key     *kp;
1684         int                     ptr;
1685
1686         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1687         XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1688
1689         ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1690
1691         /*
1692          * Go up the tree from this level toward the root.
1693          * At each level, update the key value to the value input.
1694          * Stop when we reach a level where the cursor isn't pointing
1695          * at the first entry in the block.
1696          */
1697         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1698 #ifdef DEBUG
1699                 int             error;
1700 #endif
1701                 block = xfs_btree_get_block(cur, level, &bp);
1702 #ifdef DEBUG
1703                 error = xfs_btree_check_block(cur, block, level, bp);
1704                 if (error) {
1705                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1706                         return error;
1707                 }
1708 #endif
1709                 ptr = cur->bc_ptrs[level];
1710                 kp = xfs_btree_key_addr(cur, ptr, block);
1711                 xfs_btree_copy_keys(cur, kp, keyp, 1);
1712                 xfs_btree_log_keys(cur, bp, ptr, ptr);
1713         }
1714
1715         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1716         return 0;
1717 }
1718
1719 /*
1720  * Update the record referred to by cur to the value in the
1721  * given record. This either works (return 0) or gets an
1722  * EFSCORRUPTED error.
1723  */
1724 int
1725 xfs_btree_update(
1726         struct xfs_btree_cur    *cur,
1727         union xfs_btree_rec     *rec)
1728 {
1729         struct xfs_btree_block  *block;
1730         struct xfs_buf          *bp;
1731         int                     error;
1732         int                     ptr;
1733         union xfs_btree_rec     *rp;
1734
1735         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1736         XFS_BTREE_TRACE_ARGR(cur, rec);
1737
1738         /* Pick up the current block. */
1739         block = xfs_btree_get_block(cur, 0, &bp);
1740
1741 #ifdef DEBUG
1742         error = xfs_btree_check_block(cur, block, 0, bp);
1743         if (error)
1744                 goto error0;
1745 #endif
1746         /* Get the address of the rec to be updated. */
1747         ptr = cur->bc_ptrs[0];
1748         rp = xfs_btree_rec_addr(cur, ptr, block);
1749
1750         /* Fill in the new contents and log them. */
1751         xfs_btree_copy_recs(cur, rp, rec, 1);
1752         xfs_btree_log_recs(cur, bp, ptr, ptr);
1753
1754         /*
1755          * If we are tracking the last record in the tree and
1756          * we are at the far right edge of the tree, update it.
1757          */
1758         if (xfs_btree_is_lastrec(cur, block, 0)) {
1759                 cur->bc_ops->update_lastrec(cur, block, rec,
1760                                             ptr, LASTREC_UPDATE);
1761         }
1762
1763         /* Updating first rec in leaf. Pass new key value up to our parent. */
1764         if (ptr == 1) {
1765                 union xfs_btree_key     key;
1766
1767                 cur->bc_ops->init_key_from_rec(&key, rec);
1768                 error = xfs_btree_updkey(cur, &key, 1);
1769                 if (error)
1770                         goto error0;
1771         }
1772
1773         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1774         return 0;
1775
1776 error0:
1777         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1778         return error;
1779 }
1780
1781 /*
1782  * Move 1 record left from cur/level if possible.
1783  * Update cur to reflect the new path.
1784  */
1785 STATIC int                                      /* error */
1786 xfs_btree_lshift(
1787         struct xfs_btree_cur    *cur,
1788         int                     level,
1789         int                     *stat)          /* success/failure */
1790 {
1791         union xfs_btree_key     key;            /* btree key */
1792         struct xfs_buf          *lbp;           /* left buffer pointer */
1793         struct xfs_btree_block  *left;          /* left btree block */
1794         int                     lrecs;          /* left record count */
1795         struct xfs_buf          *rbp;           /* right buffer pointer */
1796         struct xfs_btree_block  *right;         /* right btree block */
1797         int                     rrecs;          /* right record count */
1798         union xfs_btree_ptr     lptr;           /* left btree pointer */
1799         union xfs_btree_key     *rkp = NULL;    /* right btree key */
1800         union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
1801         union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
1802         int                     error;          /* error return value */
1803
1804         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1805         XFS_BTREE_TRACE_ARGI(cur, level);
1806
1807         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1808             level == cur->bc_nlevels - 1)
1809                 goto out0;
1810
1811         /* Set up variables for this block as "right". */
1812         right = xfs_btree_get_block(cur, level, &rbp);
1813
1814 #ifdef DEBUG
1815         error = xfs_btree_check_block(cur, right, level, rbp);
1816         if (error)
1817                 goto error0;
1818 #endif
1819
1820         /* If we've got no left sibling then we can't shift an entry left. */
1821         xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
1822         if (xfs_btree_ptr_is_null(cur, &lptr))
1823                 goto out0;
1824
1825         /*
1826          * If the cursor entry is the one that would be moved, don't
1827          * do it... it's too complicated.
1828          */
1829         if (cur->bc_ptrs[level] <= 1)
1830                 goto out0;
1831
1832         /* Set up the left neighbor as "left". */
1833         error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
1834         if (error)
1835                 goto error0;
1836
1837         /* If it's full, it can't take another entry. */
1838         lrecs = xfs_btree_get_numrecs(left);
1839         if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
1840                 goto out0;
1841
1842         rrecs = xfs_btree_get_numrecs(right);
1843
1844         /*
1845          * We add one entry to the left side and remove one for the right side.
1846          * Account for it here, the changes will be updated on disk and logged
1847          * later.
1848          */
1849         lrecs++;
1850         rrecs--;
1851
1852         XFS_BTREE_STATS_INC(cur, lshift);
1853         XFS_BTREE_STATS_ADD(cur, moves, 1);
1854
1855         /*
1856          * If non-leaf, copy a key and a ptr to the left block.
1857          * Log the changes to the left block.
1858          */
1859         if (level > 0) {
1860                 /* It's a non-leaf.  Move keys and pointers. */
1861                 union xfs_btree_key     *lkp;   /* left btree key */
1862                 union xfs_btree_ptr     *lpp;   /* left address pointer */
1863
1864                 lkp = xfs_btree_key_addr(cur, lrecs, left);
1865                 rkp = xfs_btree_key_addr(cur, 1, right);
1866
1867                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
1868                 rpp = xfs_btree_ptr_addr(cur, 1, right);
1869 #ifdef DEBUG
1870                 error = xfs_btree_check_ptr(cur, rpp, 0, level);
1871                 if (error)
1872                         goto error0;
1873 #endif
1874                 xfs_btree_copy_keys(cur, lkp, rkp, 1);
1875                 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
1876
1877                 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
1878                 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
1879
1880                 ASSERT(cur->bc_ops->keys_inorder(cur,
1881                         xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
1882         } else {
1883                 /* It's a leaf.  Move records.  */
1884                 union xfs_btree_rec     *lrp;   /* left record pointer */
1885
1886                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
1887                 rrp = xfs_btree_rec_addr(cur, 1, right);
1888
1889                 xfs_btree_copy_recs(cur, lrp, rrp, 1);
1890                 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
1891
1892                 ASSERT(cur->bc_ops->recs_inorder(cur,
1893                         xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
1894         }
1895
1896         xfs_btree_set_numrecs(left, lrecs);
1897         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
1898
1899         xfs_btree_set_numrecs(right, rrecs);
1900         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
1901
1902         /*
1903          * Slide the contents of right down one entry.
1904          */
1905         XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
1906         if (level > 0) {
1907                 /* It's a nonleaf. operate on keys and ptrs */
1908 #ifdef DEBUG
1909                 int                     i;              /* loop index */
1910
1911                 for (i = 0; i < rrecs; i++) {
1912                         error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
1913                         if (error)
1914                                 goto error0;
1915                 }
1916 #endif
1917                 xfs_btree_shift_keys(cur,
1918                                 xfs_btree_key_addr(cur, 2, right),
1919                                 -1, rrecs);
1920                 xfs_btree_shift_ptrs(cur,
1921                                 xfs_btree_ptr_addr(cur, 2, right),
1922                                 -1, rrecs);
1923
1924                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
1925                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
1926         } else {
1927                 /* It's a leaf. operate on records */
1928                 xfs_btree_shift_recs(cur,
1929                         xfs_btree_rec_addr(cur, 2, right),
1930                         -1, rrecs);
1931                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
1932
1933                 /*
1934                  * If it's the first record in the block, we'll need a key
1935                  * structure to pass up to the next level (updkey).
1936                  */
1937                 cur->bc_ops->init_key_from_rec(&key,
1938                         xfs_btree_rec_addr(cur, 1, right));
1939                 rkp = &key;
1940         }
1941
1942         /* Update the parent key values of right. */
1943         error = xfs_btree_updkey(cur, rkp, level + 1);
1944         if (error)
1945                 goto error0;
1946
1947         /* Slide the cursor value left one. */
1948         cur->bc_ptrs[level]--;
1949
1950         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1951         *stat = 1;
1952         return 0;
1953
1954 out0:
1955         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1956         *stat = 0;
1957         return 0;
1958
1959 error0:
1960         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1961         return error;
1962 }
1963
1964 /*
1965  * Move 1 record right from cur/level if possible.
1966  * Update cur to reflect the new path.
1967  */
1968 STATIC int                                      /* error */
1969 xfs_btree_rshift(
1970         struct xfs_btree_cur    *cur,
1971         int                     level,
1972         int                     *stat)          /* success/failure */
1973 {
1974         union xfs_btree_key     key;            /* btree key */
1975         struct xfs_buf          *lbp;           /* left buffer pointer */
1976         struct xfs_btree_block  *left;          /* left btree block */
1977         struct xfs_buf          *rbp;           /* right buffer pointer */
1978         struct xfs_btree_block  *right;         /* right btree block */
1979         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
1980         union xfs_btree_ptr     rptr;           /* right block pointer */
1981         union xfs_btree_key     *rkp;           /* right btree key */
1982         int                     rrecs;          /* right record count */
1983         int                     lrecs;          /* left record count */
1984         int                     error;          /* error return value */
1985         int                     i;              /* loop counter */
1986
1987         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1988         XFS_BTREE_TRACE_ARGI(cur, level);
1989
1990         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1991             (level == cur->bc_nlevels - 1))
1992                 goto out0;
1993
1994         /* Set up variables for this block as "left". */
1995         left = xfs_btree_get_block(cur, level, &lbp);
1996
1997 #ifdef DEBUG
1998         error = xfs_btree_check_block(cur, left, level, lbp);
1999         if (error)
2000                 goto error0;
2001 #endif
2002
2003         /* If we've got no right sibling then we can't shift an entry right. */
2004         xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2005         if (xfs_btree_ptr_is_null(cur, &rptr))
2006                 goto out0;
2007
2008         /*
2009          * If the cursor entry is the one that would be moved, don't
2010          * do it... it's too complicated.
2011          */
2012         lrecs = xfs_btree_get_numrecs(left);
2013         if (cur->bc_ptrs[level] >= lrecs)
2014                 goto out0;
2015
2016         /* Set up the right neighbor as "right". */
2017         error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2018         if (error)
2019                 goto error0;
2020
2021         /* If it's full, it can't take another entry. */
2022         rrecs = xfs_btree_get_numrecs(right);
2023         if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2024                 goto out0;
2025
2026         XFS_BTREE_STATS_INC(cur, rshift);
2027         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2028
2029         /*
2030          * Make a hole at the start of the right neighbor block, then
2031          * copy the last left block entry to the hole.
2032          */
2033         if (level > 0) {
2034                 /* It's a nonleaf. make a hole in the keys and ptrs */
2035                 union xfs_btree_key     *lkp;
2036                 union xfs_btree_ptr     *lpp;
2037                 union xfs_btree_ptr     *rpp;
2038
2039                 lkp = xfs_btree_key_addr(cur, lrecs, left);
2040                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2041                 rkp = xfs_btree_key_addr(cur, 1, right);
2042                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2043
2044 #ifdef DEBUG
2045                 for (i = rrecs - 1; i >= 0; i--) {
2046                         error = xfs_btree_check_ptr(cur, rpp, i, level);
2047                         if (error)
2048                                 goto error0;
2049                 }
2050 #endif
2051
2052                 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2053                 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2054
2055 #ifdef DEBUG
2056                 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2057                 if (error)
2058                         goto error0;
2059 #endif
2060
2061                 /* Now put the new data in, and log it. */
2062                 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2063                 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2064
2065                 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2066                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2067
2068                 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2069                         xfs_btree_key_addr(cur, 2, right)));
2070         } else {
2071                 /* It's a leaf. make a hole in the records */
2072                 union xfs_btree_rec     *lrp;
2073                 union xfs_btree_rec     *rrp;
2074
2075                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2076                 rrp = xfs_btree_rec_addr(cur, 1, right);
2077
2078                 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2079
2080                 /* Now put the new data in, and log it. */
2081                 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2082                 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2083
2084                 cur->bc_ops->init_key_from_rec(&key, rrp);
2085                 rkp = &key;
2086
2087                 ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2088                         xfs_btree_rec_addr(cur, 2, right)));
2089         }
2090
2091         /*
2092          * Decrement and log left's numrecs, bump and log right's numrecs.
2093          */
2094         xfs_btree_set_numrecs(left, --lrecs);
2095         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2096
2097         xfs_btree_set_numrecs(right, ++rrecs);
2098         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2099
2100         /*
2101          * Using a temporary cursor, update the parent key values of the
2102          * block on the right.
2103          */
2104         error = xfs_btree_dup_cursor(cur, &tcur);
2105         if (error)
2106                 goto error0;
2107         i = xfs_btree_lastrec(tcur, level);
2108         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2109
2110         error = xfs_btree_increment(tcur, level, &i);
2111         if (error)
2112                 goto error1;
2113
2114         error = xfs_btree_updkey(tcur, rkp, level + 1);
2115         if (error)
2116                 goto error1;
2117
2118         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2119
2120         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2121         *stat = 1;
2122         return 0;
2123
2124 out0:
2125         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2126         *stat = 0;
2127         return 0;
2128
2129 error0:
2130         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2131         return error;
2132
2133 error1:
2134         XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2135         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2136         return error;
2137 }
2138
2139 /*
2140  * Split cur/level block in half.
2141  * Return new block number and the key to its first
2142  * record (to be inserted into parent).
2143  */
2144 STATIC int                                      /* error */
2145 xfs_btree_split(
2146         struct xfs_btree_cur    *cur,
2147         int                     level,
2148         union xfs_btree_ptr     *ptrp,
2149         union xfs_btree_key     *key,
2150         struct xfs_btree_cur    **curp,
2151         int                     *stat)          /* success/failure */
2152 {
2153         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
2154         struct xfs_buf          *lbp;           /* left buffer pointer */
2155         struct xfs_btree_block  *left;          /* left btree block */
2156         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
2157         struct xfs_buf          *rbp;           /* right buffer pointer */
2158         struct xfs_btree_block  *right;         /* right btree block */
2159         union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
2160         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
2161         struct xfs_btree_block  *rrblock;       /* right-right btree block */
2162         int                     lrecs;
2163         int                     rrecs;
2164         int                     src_index;
2165         int                     error;          /* error return value */
2166 #ifdef DEBUG
2167         int                     i;
2168 #endif
2169
2170         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2171         XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2172
2173         XFS_BTREE_STATS_INC(cur, split);
2174
2175         /* Set up left block (current one). */
2176         left = xfs_btree_get_block(cur, level, &lbp);
2177
2178 #ifdef DEBUG
2179         error = xfs_btree_check_block(cur, left, level, lbp);
2180         if (error)
2181                 goto error0;
2182 #endif
2183
2184         xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2185
2186         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2187         error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2188         if (error)
2189                 goto error0;
2190         if (*stat == 0)
2191                 goto out0;
2192         XFS_BTREE_STATS_INC(cur, alloc);
2193
2194         /* Set up the new block as "right". */
2195         error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2196         if (error)
2197                 goto error0;
2198
2199         /* Fill in the btree header for the new right block. */
2200         xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
2201
2202         /*
2203          * Split the entries between the old and the new block evenly.
2204          * Make sure that if there's an odd number of entries now, that
2205          * each new block will have the same number of entries.
2206          */
2207         lrecs = xfs_btree_get_numrecs(left);
2208         rrecs = lrecs / 2;
2209         if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2210                 rrecs++;
2211         src_index = (lrecs - rrecs + 1);
2212
2213         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2214
2215         /*
2216          * Copy btree block entries from the left block over to the
2217          * new block, the right. Update the right block and log the
2218          * changes.
2219          */
2220         if (level > 0) {
2221                 /* It's a non-leaf.  Move keys and pointers. */
2222                 union xfs_btree_key     *lkp;   /* left btree key */
2223                 union xfs_btree_ptr     *lpp;   /* left address pointer */
2224                 union xfs_btree_key     *rkp;   /* right btree key */
2225                 union xfs_btree_ptr     *rpp;   /* right address pointer */
2226
2227                 lkp = xfs_btree_key_addr(cur, src_index, left);
2228                 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2229                 rkp = xfs_btree_key_addr(cur, 1, right);
2230                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2231
2232 #ifdef DEBUG
2233                 for (i = src_index; i < rrecs; i++) {
2234                         error = xfs_btree_check_ptr(cur, lpp, i, level);
2235                         if (error)
2236                                 goto error0;
2237                 }
2238 #endif
2239
2240                 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2241                 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2242
2243                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2244                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2245
2246                 /* Grab the keys to the entries moved to the right block */
2247                 xfs_btree_copy_keys(cur, key, rkp, 1);
2248         } else {
2249                 /* It's a leaf.  Move records.  */
2250                 union xfs_btree_rec     *lrp;   /* left record pointer */
2251                 union xfs_btree_rec     *rrp;   /* right record pointer */
2252
2253                 lrp = xfs_btree_rec_addr(cur, src_index, left);
2254                 rrp = xfs_btree_rec_addr(cur, 1, right);
2255
2256                 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2257                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2258
2259                 cur->bc_ops->init_key_from_rec(key,
2260                         xfs_btree_rec_addr(cur, 1, right));
2261         }
2262
2263
2264         /*
2265          * Find the left block number by looking in the buffer.
2266          * Adjust numrecs, sibling pointers.
2267          */
2268         xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2269         xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2270         xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2271         xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2272
2273         lrecs -= rrecs;
2274         xfs_btree_set_numrecs(left, lrecs);
2275         xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2276
2277         xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2278         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2279
2280         /*
2281          * If there's a block to the new block's right, make that block
2282          * point back to right instead of to left.
2283          */
2284         if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2285                 error = xfs_btree_read_buf_block(cur, &rrptr, level,
2286                                                         0, &rrblock, &rrbp);
2287                 if (error)
2288                         goto error0;
2289                 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2290                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2291         }
2292         /*
2293          * If the cursor is really in the right block, move it there.
2294          * If it's just pointing past the last entry in left, then we'll
2295          * insert there, so don't change anything in that case.
2296          */
2297         if (cur->bc_ptrs[level] > lrecs + 1) {
2298                 xfs_btree_setbuf(cur, level, rbp);
2299                 cur->bc_ptrs[level] -= lrecs;
2300         }
2301         /*
2302          * If there are more levels, we'll need another cursor which refers
2303          * the right block, no matter where this cursor was.
2304          */
2305         if (level + 1 < cur->bc_nlevels) {
2306                 error = xfs_btree_dup_cursor(cur, curp);
2307                 if (error)
2308                         goto error0;
2309                 (*curp)->bc_ptrs[level + 1]++;
2310         }
2311         *ptrp = rptr;
2312         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2313         *stat = 1;
2314         return 0;
2315 out0:
2316         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2317         *stat = 0;
2318         return 0;
2319
2320 error0:
2321         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2322         return error;
2323 }
2324
2325 /*
2326  * Copy the old inode root contents into a real block and make the
2327  * broot point to it.
2328  */
2329 int                                             /* error */
2330 xfs_btree_new_iroot(
2331         struct xfs_btree_cur    *cur,           /* btree cursor */
2332         int                     *logflags,      /* logging flags for inode */
2333         int                     *stat)          /* return status - 0 fail */
2334 {
2335         struct xfs_buf          *cbp;           /* buffer for cblock */
2336         struct xfs_btree_block  *block;         /* btree block */
2337         struct xfs_btree_block  *cblock;        /* child btree block */
2338         union xfs_btree_key     *ckp;           /* child key pointer */
2339         union xfs_btree_ptr     *cpp;           /* child ptr pointer */
2340         union xfs_btree_key     *kp;            /* pointer to btree key */
2341         union xfs_btree_ptr     *pp;            /* pointer to block addr */
2342         union xfs_btree_ptr     nptr;           /* new block addr */
2343         int                     level;          /* btree level */
2344         int                     error;          /* error return code */
2345 #ifdef DEBUG
2346         int                     i;              /* loop counter */
2347 #endif
2348
2349         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2350         XFS_BTREE_STATS_INC(cur, newroot);
2351
2352         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2353
2354         level = cur->bc_nlevels - 1;
2355
2356         block = xfs_btree_get_iroot(cur);
2357         pp = xfs_btree_ptr_addr(cur, 1, block);
2358
2359         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2360         error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
2361         if (error)
2362                 goto error0;
2363         if (*stat == 0) {
2364                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2365                 return 0;
2366         }
2367         XFS_BTREE_STATS_INC(cur, alloc);
2368
2369         /* Copy the root into a real block. */
2370         error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2371         if (error)
2372                 goto error0;
2373
2374         memcpy(cblock, block, xfs_btree_block_len(cur));
2375
2376         be16_add_cpu(&block->bb_level, 1);
2377         xfs_btree_set_numrecs(block, 1);
2378         cur->bc_nlevels++;
2379         cur->bc_ptrs[level + 1] = 1;
2380
2381         kp = xfs_btree_key_addr(cur, 1, block);
2382         ckp = xfs_btree_key_addr(cur, 1, cblock);
2383         xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2384
2385         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2386 #ifdef DEBUG
2387         for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2388                 error = xfs_btree_check_ptr(cur, pp, i, level);
2389                 if (error)
2390                         goto error0;
2391         }
2392 #endif
2393         xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2394
2395 #ifdef DEBUG
2396         error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2397         if (error)
2398                 goto error0;
2399 #endif
2400         xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2401
2402         xfs_iroot_realloc(cur->bc_private.b.ip,
2403                           1 - xfs_btree_get_numrecs(cblock),
2404                           cur->bc_private.b.whichfork);
2405
2406         xfs_btree_setbuf(cur, level, cbp);
2407
2408         /*
2409          * Do all this logging at the end so that
2410          * the root is at the right level.
2411          */
2412         xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2413         xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2414         xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2415
2416         *logflags |=
2417                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
2418         *stat = 1;
2419         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2420         return 0;
2421 error0:
2422         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2423         return error;
2424 }
2425
2426 /*
2427  * Allocate a new root block, fill it in.
2428  */
2429 STATIC int                              /* error */
2430 xfs_btree_new_root(
2431         struct xfs_btree_cur    *cur,   /* btree cursor */
2432         int                     *stat)  /* success/failure */
2433 {
2434         struct xfs_btree_block  *block; /* one half of the old root block */
2435         struct xfs_buf          *bp;    /* buffer containing block */
2436         int                     error;  /* error return value */
2437         struct xfs_buf          *lbp;   /* left buffer pointer */
2438         struct xfs_btree_block  *left;  /* left btree block */
2439         struct xfs_buf          *nbp;   /* new (root) buffer */
2440         struct xfs_btree_block  *new;   /* new (root) btree block */
2441         int                     nptr;   /* new value for key index, 1 or 2 */
2442         struct xfs_buf          *rbp;   /* right buffer pointer */
2443         struct xfs_btree_block  *right; /* right btree block */
2444         union xfs_btree_ptr     rptr;
2445         union xfs_btree_ptr     lptr;
2446
2447         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2448         XFS_BTREE_STATS_INC(cur, newroot);
2449
2450         /* initialise our start point from the cursor */
2451         cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2452
2453         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2454         error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
2455         if (error)
2456                 goto error0;
2457         if (*stat == 0)
2458                 goto out0;
2459         XFS_BTREE_STATS_INC(cur, alloc);
2460
2461         /* Set up the new block. */
2462         error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2463         if (error)
2464                 goto error0;
2465
2466         /* Set the root in the holding structure  increasing the level by 1. */
2467         cur->bc_ops->set_root(cur, &lptr, 1);
2468
2469         /*
2470          * At the previous root level there are now two blocks: the old root,
2471          * and the new block generated when it was split.  We don't know which
2472          * one the cursor is pointing at, so we set up variables "left" and
2473          * "right" for each case.
2474          */
2475         block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2476
2477 #ifdef DEBUG
2478         error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2479         if (error)
2480                 goto error0;
2481 #endif
2482
2483         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2484         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2485                 /* Our block is left, pick up the right block. */
2486                 lbp = bp;
2487                 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2488                 left = block;
2489                 error = xfs_btree_read_buf_block(cur, &rptr,
2490                                         cur->bc_nlevels - 1, 0, &right, &rbp);
2491                 if (error)
2492                         goto error0;
2493                 bp = rbp;
2494                 nptr = 1;
2495         } else {
2496                 /* Our block is right, pick up the left block. */
2497                 rbp = bp;
2498                 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2499                 right = block;
2500                 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2501                 error = xfs_btree_read_buf_block(cur, &lptr,
2502                                         cur->bc_nlevels - 1, 0, &left, &lbp);
2503                 if (error)
2504                         goto error0;
2505                 bp = lbp;
2506                 nptr = 2;
2507         }
2508         /* Fill in the new block's btree header and log it. */
2509         xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
2510         xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2511         ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2512                         !xfs_btree_ptr_is_null(cur, &rptr));
2513
2514         /* Fill in the key data in the new root. */
2515         if (xfs_btree_get_level(left) > 0) {
2516                 xfs_btree_copy_keys(cur,
2517                                 xfs_btree_key_addr(cur, 1, new),
2518                                 xfs_btree_key_addr(cur, 1, left), 1);
2519                 xfs_btree_copy_keys(cur,
2520                                 xfs_btree_key_addr(cur, 2, new),
2521                                 xfs_btree_key_addr(cur, 1, right), 1);
2522         } else {
2523                 cur->bc_ops->init_key_from_rec(
2524                                 xfs_btree_key_addr(cur, 1, new),
2525                                 xfs_btree_rec_addr(cur, 1, left));
2526                 cur->bc_ops->init_key_from_rec(
2527                                 xfs_btree_key_addr(cur, 2, new),
2528                                 xfs_btree_rec_addr(cur, 1, right));
2529         }
2530         xfs_btree_log_keys(cur, nbp, 1, 2);
2531
2532         /* Fill in the pointer data in the new root. */
2533         xfs_btree_copy_ptrs(cur,
2534                 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2535         xfs_btree_copy_ptrs(cur,
2536                 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2537         xfs_btree_log_ptrs(cur, nbp, 1, 2);
2538
2539         /* Fix up the cursor. */
2540         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2541         cur->bc_ptrs[cur->bc_nlevels] = nptr;
2542         cur->bc_nlevels++;
2543         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2544         *stat = 1;
2545         return 0;
2546 error0:
2547         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2548         return error;
2549 out0:
2550         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2551         *stat = 0;
2552         return 0;
2553 }
2554
2555 STATIC int
2556 xfs_btree_make_block_unfull(
2557         struct xfs_btree_cur    *cur,   /* btree cursor */
2558         int                     level,  /* btree level */
2559         int                     numrecs,/* # of recs in block */
2560         int                     *oindex,/* old tree index */
2561         int                     *index, /* new tree index */
2562         union xfs_btree_ptr     *nptr,  /* new btree ptr */
2563         struct xfs_btree_cur    **ncur, /* new btree cursor */
2564         union xfs_btree_rec     *nrec,  /* new record */
2565         int                     *stat)
2566 {
2567         union xfs_btree_key     key;    /* new btree key value */
2568         int                     error = 0;
2569
2570         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2571             level == cur->bc_nlevels - 1) {
2572                 struct xfs_inode *ip = cur->bc_private.b.ip;
2573
2574                 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2575                         /* A root block that can be made bigger. */
2576
2577                         xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2578                 } else {
2579                         /* A root block that needs replacing */
2580                         int     logflags = 0;
2581
2582                         error = xfs_btree_new_iroot(cur, &logflags, stat);
2583                         if (error || *stat == 0)
2584                                 return error;
2585
2586                         xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2587                 }
2588
2589                 return 0;
2590         }
2591
2592         /* First, try shifting an entry to the right neighbor. */
2593         error = xfs_btree_rshift(cur, level, stat);
2594         if (error || *stat)
2595                 return error;
2596
2597         /* Next, try shifting an entry to the left neighbor. */
2598         error = xfs_btree_lshift(cur, level, stat);
2599         if (error)
2600                 return error;
2601
2602         if (*stat) {
2603                 *oindex = *index = cur->bc_ptrs[level];
2604                 return 0;
2605         }
2606
2607         /*
2608          * Next, try splitting the current block in half.
2609          *
2610          * If this works we have to re-set our variables because we
2611          * could be in a different block now.
2612          */
2613         error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2614         if (error || *stat == 0)
2615                 return error;
2616
2617
2618         *index = cur->bc_ptrs[level];
2619         cur->bc_ops->init_rec_from_key(&key, nrec);
2620         return 0;
2621 }
2622
2623 /*
2624  * Insert one record/level.  Return information to the caller
2625  * allowing the next level up to proceed if necessary.
2626  */
2627 STATIC int
2628 xfs_btree_insrec(
2629         struct xfs_btree_cur    *cur,   /* btree cursor */
2630         int                     level,  /* level to insert record at */
2631         union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
2632         union xfs_btree_rec     *recp,  /* i/o: record data inserted */
2633         struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
2634         int                     *stat)  /* success/failure */
2635 {
2636         struct xfs_btree_block  *block; /* btree block */
2637         struct xfs_buf          *bp;    /* buffer for block */
2638         union xfs_btree_key     key;    /* btree key */
2639         union xfs_btree_ptr     nptr;   /* new block ptr */
2640         struct xfs_btree_cur    *ncur;  /* new btree cursor */
2641         union xfs_btree_rec     nrec;   /* new record count */
2642         int                     optr;   /* old key/record index */
2643         int                     ptr;    /* key/record index */
2644         int                     numrecs;/* number of records */
2645         int                     error;  /* error return value */
2646 #ifdef DEBUG
2647         int                     i;
2648 #endif
2649
2650         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2651         XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2652
2653         ncur = NULL;
2654
2655         /*
2656          * If we have an external root pointer, and we've made it to the
2657          * root level, allocate a new root block and we're done.
2658          */
2659         if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2660             (level >= cur->bc_nlevels)) {
2661                 error = xfs_btree_new_root(cur, stat);
2662                 xfs_btree_set_ptr_null(cur, ptrp);
2663
2664                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2665                 return error;
2666         }
2667
2668         /* If we're off the left edge, return failure. */
2669         ptr = cur->bc_ptrs[level];
2670         if (ptr == 0) {
2671                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2672                 *stat = 0;
2673                 return 0;
2674         }
2675
2676         /* Make a key out of the record data to be inserted, and save it. */
2677         cur->bc_ops->init_key_from_rec(&key, recp);
2678
2679         optr = ptr;
2680
2681         XFS_BTREE_STATS_INC(cur, insrec);
2682
2683         /* Get pointers to the btree buffer and block. */
2684         block = xfs_btree_get_block(cur, level, &bp);
2685         numrecs = xfs_btree_get_numrecs(block);
2686
2687 #ifdef DEBUG
2688         error = xfs_btree_check_block(cur, block, level, bp);
2689         if (error)
2690                 goto error0;
2691
2692         /* Check that the new entry is being inserted in the right place. */
2693         if (ptr <= numrecs) {
2694                 if (level == 0) {
2695                         ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2696                                 xfs_btree_rec_addr(cur, ptr, block)));
2697                 } else {
2698                         ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2699                                 xfs_btree_key_addr(cur, ptr, block)));
2700                 }
2701         }
2702 #endif
2703
2704         /*
2705          * If the block is full, we can't insert the new entry until we
2706          * make the block un-full.
2707          */
2708         xfs_btree_set_ptr_null(cur, &nptr);
2709         if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2710                 error = xfs_btree_make_block_unfull(cur, level, numrecs,
2711                                         &optr, &ptr, &nptr, &ncur, &nrec, stat);
2712                 if (error || *stat == 0)
2713                         goto error0;
2714         }
2715
2716         /*
2717          * The current block may have changed if the block was
2718          * previously full and we have just made space in it.
2719          */
2720         block = xfs_btree_get_block(cur, level, &bp);
2721         numrecs = xfs_btree_get_numrecs(block);
2722
2723 #ifdef DEBUG
2724         error = xfs_btree_check_block(cur, block, level, bp);
2725         if (error)
2726                 return error;
2727 #endif
2728
2729         /*
2730          * At this point we know there's room for our new entry in the block
2731          * we're pointing at.
2732          */
2733         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
2734
2735         if (level > 0) {
2736                 /* It's a nonleaf. make a hole in the keys and ptrs */
2737                 union xfs_btree_key     *kp;
2738                 union xfs_btree_ptr     *pp;
2739
2740                 kp = xfs_btree_key_addr(cur, ptr, block);
2741                 pp = xfs_btree_ptr_addr(cur, ptr, block);
2742
2743 #ifdef DEBUG
2744                 for (i = numrecs - ptr; i >= 0; i--) {
2745                         error = xfs_btree_check_ptr(cur, pp, i, level);
2746                         if (error)
2747                                 return error;
2748                 }
2749 #endif
2750
2751                 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
2752                 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
2753
2754 #ifdef DEBUG
2755                 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
2756                 if (error)
2757                         goto error0;
2758 #endif
2759
2760                 /* Now put the new data in, bump numrecs and log it. */
2761                 xfs_btree_copy_keys(cur, kp, &key, 1);
2762                 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
2763                 numrecs++;
2764                 xfs_btree_set_numrecs(block, numrecs);
2765                 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
2766                 xfs_btree_log_keys(cur, bp, ptr, numrecs);
2767 #ifdef DEBUG
2768                 if (ptr < numrecs) {
2769                         ASSERT(cur->bc_ops->keys_inorder(cur, kp,
2770                                 xfs_btree_key_addr(cur, ptr + 1, block)));
2771                 }
2772 #endif
2773         } else {
2774                 /* It's a leaf. make a hole in the records */
2775                 union xfs_btree_rec             *rp;
2776
2777                 rp = xfs_btree_rec_addr(cur, ptr, block);
2778
2779                 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
2780
2781                 /* Now put the new data in, bump numrecs and log it. */
2782                 xfs_btree_copy_recs(cur, rp, recp, 1);
2783                 xfs_btree_set_numrecs(block, ++numrecs);
2784                 xfs_btree_log_recs(cur, bp, ptr, numrecs);
2785 #ifdef DEBUG
2786                 if (ptr < numrecs) {
2787                         ASSERT(cur->bc_ops->recs_inorder(cur, rp,
2788                                 xfs_btree_rec_addr(cur, ptr + 1, block)));
2789                 }
2790 #endif
2791         }
2792
2793         /* Log the new number of records in the btree header. */
2794         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
2795
2796         /* If we inserted at the start of a block, update the parents' keys. */
2797         if (optr == 1) {
2798                 error = xfs_btree_updkey(cur, &key, level + 1);
2799                 if (error)
2800                         goto error0;
2801         }
2802
2803         /*
2804          * If we are tracking the last record in the tree and
2805          * we are at the far right edge of the tree, update it.
2806          */
2807         if (xfs_btree_is_lastrec(cur, block, level)) {
2808                 cur->bc_ops->update_lastrec(cur, block, recp,
2809                                             ptr, LASTREC_INSREC);
2810         }
2811
2812         /*
2813          * Return the new block number, if any.
2814          * If there is one, give back a record value and a cursor too.
2815          */
2816         *ptrp = nptr;
2817         if (!xfs_btree_ptr_is_null(cur, &nptr)) {
2818                 *recp = nrec;
2819                 *curp = ncur;
2820         }
2821
2822         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2823         *stat = 1;
2824         return 0;
2825
2826 error0:
2827         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2828         return error;
2829 }
2830
2831 /*
2832  * Insert the record at the point referenced by cur.
2833  *
2834  * A multi-level split of the tree on insert will invalidate the original
2835  * cursor.  All callers of this function should assume that the cursor is
2836  * no longer valid and revalidate it.
2837  */
2838 int
2839 xfs_btree_insert(
2840         struct xfs_btree_cur    *cur,
2841         int                     *stat)
2842 {
2843         int                     error;  /* error return value */
2844         int                     i;      /* result value, 0 for failure */
2845         int                     level;  /* current level number in btree */
2846         union xfs_btree_ptr     nptr;   /* new block number (split result) */
2847         struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
2848         struct xfs_btree_cur    *pcur;  /* previous level's cursor */
2849         union xfs_btree_rec     rec;    /* record to insert */
2850
2851         level = 0;
2852         ncur = NULL;
2853         pcur = cur;
2854
2855         xfs_btree_set_ptr_null(cur, &nptr);
2856         cur->bc_ops->init_rec_from_cur(cur, &rec);
2857
2858         /*
2859          * Loop going up the tree, starting at the leaf level.
2860          * Stop when we don't get a split block, that must mean that
2861          * the insert is finished with this level.
2862          */
2863         do {
2864                 /*
2865                  * Insert nrec/nptr into this level of the tree.
2866                  * Note if we fail, nptr will be null.
2867                  */
2868                 error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
2869                 if (error) {
2870                         if (pcur != cur)
2871                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2872                         goto error0;
2873                 }
2874
2875                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2876                 level++;
2877
2878                 /*
2879                  * See if the cursor we just used is trash.
2880                  * Can't trash the caller's cursor, but otherwise we should
2881                  * if ncur is a new cursor or we're about to be done.
2882                  */
2883                 if (pcur != cur &&
2884                     (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
2885                         /* Save the state from the cursor before we trash it */
2886                         if (cur->bc_ops->update_cursor)
2887                                 cur->bc_ops->update_cursor(pcur, cur);
2888                         cur->bc_nlevels = pcur->bc_nlevels;
2889                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2890                 }
2891                 /* If we got a new cursor, switch to it. */
2892                 if (ncur) {
2893                         pcur = ncur;
2894                         ncur = NULL;
2895                 }
2896         } while (!xfs_btree_ptr_is_null(cur, &nptr));
2897
2898         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2899         *stat = i;
2900         return 0;
2901 error0:
2902         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2903         return error;
2904 }
2905
2906 /*
2907  * Try to merge a non-leaf block back into the inode root.
2908  *
2909  * Note: the killroot names comes from the fact that we're effectively
2910  * killing the old root block.  But because we can't just delete the
2911  * inode we have to copy the single block it was pointing to into the
2912  * inode.
2913  */
2914 STATIC int
2915 xfs_btree_kill_iroot(
2916         struct xfs_btree_cur    *cur)
2917 {
2918         int                     whichfork = cur->bc_private.b.whichfork;
2919         struct xfs_inode        *ip = cur->bc_private.b.ip;
2920         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
2921         struct xfs_btree_block  *block;
2922         struct xfs_btree_block  *cblock;
2923         union xfs_btree_key     *kp;
2924         union xfs_btree_key     *ckp;
2925         union xfs_btree_ptr     *pp;
2926         union xfs_btree_ptr     *cpp;
2927         struct xfs_buf          *cbp;
2928         int                     level;
2929         int                     index;
2930         int                     numrecs;
2931 #ifdef DEBUG
2932         union xfs_btree_ptr     ptr;
2933         int                     i;
2934 #endif
2935
2936         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2937
2938         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2939         ASSERT(cur->bc_nlevels > 1);
2940
2941         /*
2942          * Don't deal with the root block needs to be a leaf case.
2943          * We're just going to turn the thing back into extents anyway.
2944          */
2945         level = cur->bc_nlevels - 1;
2946         if (level == 1)
2947                 goto out0;
2948
2949         /*
2950          * Give up if the root has multiple children.
2951          */
2952         block = xfs_btree_get_iroot(cur);
2953         if (xfs_btree_get_numrecs(block) != 1)
2954                 goto out0;
2955
2956         cblock = xfs_btree_get_block(cur, level - 1, &cbp);
2957         numrecs = xfs_btree_get_numrecs(cblock);
2958
2959         /*
2960          * Only do this if the next level will fit.
2961          * Then the data must be copied up to the inode,
2962          * instead of freeing the root you free the next level.
2963          */
2964         if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
2965                 goto out0;
2966
2967         XFS_BTREE_STATS_INC(cur, killroot);
2968
2969 #ifdef DEBUG
2970         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
2971         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
2972         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
2973         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
2974 #endif
2975
2976         index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
2977         if (index) {
2978                 xfs_iroot_realloc(cur->bc_private.b.ip, index,
2979                                   cur->bc_private.b.whichfork);
2980                 block = ifp->if_broot;
2981         }
2982
2983         be16_add_cpu(&block->bb_numrecs, index);
2984         ASSERT(block->bb_numrecs == cblock->bb_numrecs);
2985
2986         kp = xfs_btree_key_addr(cur, 1, block);
2987         ckp = xfs_btree_key_addr(cur, 1, cblock);
2988         xfs_btree_copy_keys(cur, kp, ckp, numrecs);
2989
2990         pp = xfs_btree_ptr_addr(cur, 1, block);
2991         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2992 #ifdef DEBUG
2993         for (i = 0; i < numrecs; i++) {
2994                 int             error;
2995
2996                 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
2997                 if (error) {
2998                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2999                         return error;
3000                 }
3001         }
3002 #endif
3003         xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3004
3005         cur->bc_ops->free_block(cur, cbp);
3006         XFS_BTREE_STATS_INC(cur, free);
3007
3008         cur->bc_bufs[level - 1] = NULL;
3009         be16_add_cpu(&block->bb_level, -1);
3010         xfs_trans_log_inode(cur->bc_tp, ip,
3011                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
3012         cur->bc_nlevels--;
3013 out0:
3014         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3015         return 0;
3016 }
3017
3018 STATIC int
3019 xfs_btree_dec_cursor(
3020         struct xfs_btree_cur    *cur,
3021         int                     level,
3022         int                     *stat)
3023 {
3024         int                     error;
3025         int                     i;
3026
3027         if (level > 0) {
3028                 error = xfs_btree_decrement(cur, level, &i);
3029                 if (error)
3030                         return error;
3031         }
3032
3033         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3034         *stat = 1;
3035         return 0;
3036 }
3037
3038 /*
3039  * Single level of the btree record deletion routine.
3040  * Delete record pointed to by cur/level.
3041  * Remove the record from its block then rebalance the tree.
3042  * Return 0 for error, 1 for done, 2 to go on to the next level.
3043  */
3044 STATIC int                                      /* error */
3045 xfs_btree_delrec(
3046         struct xfs_btree_cur    *cur,           /* btree cursor */
3047         int                     level,          /* level removing record from */
3048         int                     *stat)          /* fail/done/go-on */
3049 {
3050         struct xfs_btree_block  *block;         /* btree block */
3051         union xfs_btree_ptr     cptr;           /* current block ptr */
3052         struct xfs_buf          *bp;            /* buffer for block */
3053         int                     error;          /* error return value */
3054         int                     i;              /* loop counter */
3055         union xfs_btree_key     key;            /* storage for keyp */
3056         union xfs_btree_key     *keyp = &key;   /* passed to the next level */
3057         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
3058         struct xfs_buf          *lbp;           /* left buffer pointer */
3059         struct xfs_btree_block  *left;          /* left btree block */
3060         int                     lrecs = 0;      /* left record count */
3061         int                     ptr;            /* key/record index */
3062         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
3063         struct xfs_buf          *rbp;           /* right buffer pointer */
3064         struct xfs_btree_block  *right;         /* right btree block */
3065         struct xfs_btree_block  *rrblock;       /* right-right btree block */
3066         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
3067         int                     rrecs = 0;      /* right record count */
3068         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
3069         int                     numrecs;        /* temporary numrec count */
3070
3071         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3072         XFS_BTREE_TRACE_ARGI(cur, level);
3073
3074         tcur = NULL;
3075
3076         /* Get the index of the entry being deleted, check for nothing there. */
3077         ptr = cur->bc_ptrs[level];
3078         if (ptr == 0) {
3079                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3080                 *stat = 0;
3081                 return 0;
3082         }
3083
3084         /* Get the buffer & block containing the record or key/ptr. */
3085         block = xfs_btree_get_block(cur, level, &bp);
3086         numrecs = xfs_btree_get_numrecs(block);
3087
3088 #ifdef DEBUG
3089         error = xfs_btree_check_block(cur, block, level, bp);
3090         if (error)
3091                 goto error0;
3092 #endif
3093
3094         /* Fail if we're off the end of the block. */
3095         if (ptr > numrecs) {
3096                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3097                 *stat = 0;
3098                 return 0;
3099         }
3100
3101         XFS_BTREE_STATS_INC(cur, delrec);
3102         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3103
3104         /* Excise the entries being deleted. */
3105         if (level > 0) {
3106                 /* It's a nonleaf. operate on keys and ptrs */
3107                 union xfs_btree_key     *lkp;
3108                 union xfs_btree_ptr     *lpp;
3109
3110                 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3111                 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3112
3113 #ifdef DEBUG
3114                 for (i = 0; i < numrecs - ptr; i++) {
3115                         error = xfs_btree_check_ptr(cur, lpp, i, level);
3116                         if (error)
3117                                 goto error0;
3118                 }
3119 #endif
3120
3121                 if (ptr < numrecs) {
3122                         xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3123                         xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3124                         xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3125                         xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3126                 }
3127
3128                 /*
3129                  * If it's the first record in the block, we'll need to pass a
3130                  * key up to the next level (updkey).
3131                  */
3132                 if (ptr == 1)
3133                         keyp = xfs_btree_key_addr(cur, 1, block);
3134         } else {
3135                 /* It's a leaf. operate on records */
3136                 if (ptr < numrecs) {
3137                         xfs_btree_shift_recs(cur,
3138                                 xfs_btree_rec_addr(cur, ptr + 1, block),
3139                                 -1, numrecs - ptr);
3140                         xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3141                 }
3142
3143                 /*
3144                  * If it's the first record in the block, we'll need a key
3145                  * structure to pass up to the next level (updkey).
3146                  */
3147                 if (ptr == 1) {
3148                         cur->bc_ops->init_key_from_rec(&key,
3149                                         xfs_btree_rec_addr(cur, 1, block));
3150                         keyp = &key;
3151                 }
3152         }
3153
3154         /*
3155          * Decrement and log the number of entries in the block.
3156          */
3157         xfs_btree_set_numrecs(block, --numrecs);
3158         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3159
3160         /*
3161          * If we are tracking the last record in the tree and
3162          * we are at the far right edge of the tree, update it.
3163          */
3164         if (xfs_btree_is_lastrec(cur, block, level)) {
3165                 cur->bc_ops->update_lastrec(cur, block, NULL,
3166                                             ptr, LASTREC_DELREC);
3167         }
3168
3169         /*
3170          * We're at the root level.  First, shrink the root block in-memory.
3171          * Try to get rid of the next level down.  If we can't then there's
3172          * nothing left to do.
3173          */
3174         if (level == cur->bc_nlevels - 1) {
3175                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3176                         xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3177                                           cur->bc_private.b.whichfork);
3178
3179                         error = xfs_btree_kill_iroot(cur);
3180                         if (error)
3181                                 goto error0;
3182
3183                         error = xfs_btree_dec_cursor(cur, level, stat);
3184                         if (error)
3185                                 goto error0;
3186                         *stat = 1;
3187                         return 0;
3188                 }
3189
3190                 /*
3191                  * If this is the root level, and there's only one entry left,
3192                  * and it's NOT the leaf level, then we can get rid of this
3193                  * level.
3194                  */
3195                 if (numrecs == 1 && level > 0) {
3196                         union xfs_btree_ptr     *pp;
3197                         /*
3198                          * pp is still set to the first pointer in the block.
3199                          * Make it the new root of the btree.
3200                          */
3201                         pp = xfs_btree_ptr_addr(cur, 1, block);
3202                         error = cur->bc_ops->kill_root(cur, bp, level, pp);
3203                         if (error)
3204                                 goto error0;
3205                 } else if (level > 0) {
3206                         error = xfs_btree_dec_cursor(cur, level, stat);
3207                         if (error)
3208                                 goto error0;
3209                 }
3210                 *stat = 1;
3211                 return 0;
3212         }
3213
3214         /*
3215          * If we deleted the leftmost entry in the block, update the
3216          * key values above us in the tree.
3217          */
3218         if (ptr == 1) {
3219                 error = xfs_btree_updkey(cur, keyp, level + 1);
3220                 if (error)
3221                         goto error0;
3222         }
3223
3224         /*
3225          * If the number of records remaining in the block is at least
3226          * the minimum, we're done.
3227          */
3228         if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3229                 error = xfs_btree_dec_cursor(cur, level, stat);
3230                 if (error)
3231                         goto error0;
3232                 return 0;
3233         }
3234
3235         /*
3236          * Otherwise, we have to move some records around to keep the
3237          * tree balanced.  Look at the left and right sibling blocks to
3238          * see if we can re-balance by moving only one record.
3239          */
3240         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3241         xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3242
3243         if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3244                 /*
3245                  * One child of root, need to get a chance to copy its contents
3246                  * into the root and delete it. Can't go up to next level,
3247                  * there's nothing to delete there.
3248                  */
3249                 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3250                     xfs_btree_ptr_is_null(cur, &lptr) &&
3251                     level == cur->bc_nlevels - 2) {
3252                         error = xfs_btree_kill_iroot(cur);
3253                         if (!error)
3254                                 error = xfs_btree_dec_cursor(cur, level, stat);
3255                         if (error)
3256                                 goto error0;
3257                         return 0;
3258                 }
3259         }
3260
3261         ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3262                !xfs_btree_ptr_is_null(cur, &lptr));
3263
3264         /*
3265          * Duplicate the cursor so our btree manipulations here won't
3266          * disrupt the next level up.
3267          */
3268         error = xfs_btree_dup_cursor(cur, &tcur);
3269         if (error)
3270                 goto error0;
3271
3272         /*
3273          * If there's a right sibling, see if it's ok to shift an entry
3274          * out of it.
3275          */
3276         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3277                 /*
3278                  * Move the temp cursor to the last entry in the next block.
3279                  * Actually any entry but the first would suffice.
3280                  */
3281                 i = xfs_btree_lastrec(tcur, level);
3282                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3283
3284                 error = xfs_btree_increment(tcur, level, &i);
3285                 if (error)
3286                         goto error0;
3287                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3288
3289                 i = xfs_btree_lastrec(tcur, level);
3290                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3291
3292                 /* Grab a pointer to the block. */
3293                 right = xfs_btree_get_block(tcur, level, &rbp);
3294 #ifdef DEBUG
3295                 error = xfs_btree_check_block(tcur, right, level, rbp);
3296                 if (error)
3297                         goto error0;
3298 #endif
3299                 /* Grab the current block number, for future use. */
3300                 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3301
3302                 /*
3303                  * If right block is full enough so that removing one entry
3304                  * won't make it too empty, and left-shifting an entry out
3305                  * of right to us works, we're done.
3306                  */
3307                 if (xfs_btree_get_numrecs(right) - 1 >=
3308                     cur->bc_ops->get_minrecs(tcur, level)) {
3309                         error = xfs_btree_lshift(tcur, level, &i);
3310                         if (error)
3311                                 goto error0;
3312                         if (i) {
3313                                 ASSERT(xfs_btree_get_numrecs(block) >=
3314                                        cur->bc_ops->get_minrecs(tcur, level));
3315
3316                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3317                                 tcur = NULL;
3318
3319                                 error = xfs_btree_dec_cursor(cur, level, stat);
3320                                 if (error)
3321                                         goto error0;
3322                                 return 0;
3323                         }
3324                 }
3325
3326                 /*
3327                  * Otherwise, grab the number of records in right for
3328                  * future reference, and fix up the temp cursor to point
3329                  * to our block again (last record).
3330                  */
3331                 rrecs = xfs_btree_get_numrecs(right);
3332                 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3333                         i = xfs_btree_firstrec(tcur, level);
3334                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3335
3336                         error = xfs_btree_decrement(tcur, level, &i);
3337                         if (error)
3338                                 goto error0;
3339                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3340                 }
3341         }
3342
3343         /*
3344          * If there's a left sibling, see if it's ok to shift an entry
3345          * out of it.
3346          */
3347         if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3348                 /*
3349                  * Move the temp cursor to the first entry in the
3350                  * previous block.
3351                  */
3352                 i = xfs_btree_firstrec(tcur, level);
3353                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3354
3355                 error = xfs_btree_decrement(tcur, level, &i);
3356                 if (error)
3357                         goto error0;
3358                 i = xfs_btree_firstrec(tcur, level);
3359                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3360
3361                 /* Grab a pointer to the block. */
3362                 left = xfs_btree_get_block(tcur, level, &lbp);
3363 #ifdef DEBUG
3364                 error = xfs_btree_check_block(cur, left, level, lbp);
3365                 if (error)
3366                         goto error0;
3367 #endif
3368                 /* Grab the current block number, for future use. */
3369                 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3370
3371                 /*
3372                  * If left block is full enough so that removing one entry
3373                  * won't make it too empty, and right-shifting an entry out
3374                  * of left to us works, we're done.
3375                  */
3376                 if (xfs_btree_get_numrecs(left) - 1 >=
3377                     cur->bc_ops->get_minrecs(tcur, level)) {
3378                         error = xfs_btree_rshift(tcur, level, &i);
3379                         if (error)
3380                                 goto error0;
3381                         if (i) {
3382                                 ASSERT(xfs_btree_get_numrecs(block) >=
3383                                        cur->bc_ops->get_minrecs(tcur, level));
3384                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3385                                 tcur = NULL;
3386                                 if (level == 0)
3387                                         cur->bc_ptrs[0]++;
3388                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3389                                 *stat = 1;
3390                                 return 0;
3391                         }
3392                 }
3393
3394                 /*
3395                  * Otherwise, grab the number of records in right for
3396                  * future reference.
3397                  */
3398                 lrecs = xfs_btree_get_numrecs(left);
3399         }
3400
3401         /* Delete the temp cursor, we're done with it. */
3402         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3403         tcur = NULL;
3404
3405         /* If here, we need to do a join to keep the tree balanced. */
3406         ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3407
3408         if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3409             lrecs + xfs_btree_get_numrecs(block) <=
3410                         cur->bc_ops->get_maxrecs(cur, level)) {
3411                 /*
3412                  * Set "right" to be the starting block,
3413                  * "left" to be the left neighbor.
3414                  */
3415                 rptr = cptr;
3416                 right = block;
3417                 rbp = bp;
3418                 error = xfs_btree_read_buf_block(cur, &lptr, level,
3419                                                         0, &left, &lbp);
3420                 if (error)
3421                         goto error0;
3422
3423         /*
3424          * If that won't work, see if we can join with the right neighbor block.
3425          */
3426         } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3427                    rrecs + xfs_btree_get_numrecs(block) <=
3428                         cur->bc_ops->get_maxrecs(cur, level)) {
3429                 /*
3430                  * Set "left" to be the starting block,
3431                  * "right" to be the right neighbor.
3432                  */
3433                 lptr = cptr;
3434                 left = block;
3435                 lbp = bp;
3436                 error = xfs_btree_read_buf_block(cur, &rptr, level,
3437                                                         0, &right, &rbp);
3438                 if (error)
3439                         goto error0;
3440
3441         /*
3442          * Otherwise, we can't fix the imbalance.
3443          * Just return.  This is probably a logic error, but it's not fatal.
3444          */
3445         } else {
3446                 error = xfs_btree_dec_cursor(cur, level, stat);
3447                 if (error)
3448                         goto error0;
3449                 return 0;
3450         }
3451
3452         rrecs = xfs_btree_get_numrecs(right);
3453         lrecs = xfs_btree_get_numrecs(left);
3454
3455         /*
3456          * We're now going to join "left" and "right" by moving all the stuff
3457          * in "right" to "left" and deleting "right".
3458          */
3459         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3460         if (level > 0) {
3461                 /* It's a non-leaf.  Move keys and pointers. */
3462                 union xfs_btree_key     *lkp;   /* left btree key */
3463                 union xfs_btree_ptr     *lpp;   /* left address pointer */
3464                 union xfs_btree_key     *rkp;   /* right btree key */
3465                 union xfs_btree_ptr     *rpp;   /* right address pointer */
3466
3467                 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3468                 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3469                 rkp = xfs_btree_key_addr(cur, 1, right);
3470                 rpp = xfs_btree_ptr_addr(cur, 1, right);
3471 #ifdef DEBUG
3472                 for (i = 1; i < rrecs; i++) {
3473                         error = xfs_btree_check_ptr(cur, rpp, i, level);
3474                         if (error)
3475                                 goto error0;
3476                 }
3477 #endif
3478                 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3479                 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3480
3481                 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3482                 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3483         } else {
3484                 /* It's a leaf.  Move records.  */
3485                 union xfs_btree_rec     *lrp;   /* left record pointer */
3486                 union xfs_btree_rec     *rrp;   /* right record pointer */
3487
3488                 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3489                 rrp = xfs_btree_rec_addr(cur, 1, right);
3490
3491                 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3492                 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3493         }
3494
3495         XFS_BTREE_STATS_INC(cur, join);
3496
3497         /*
3498          * Fix up the number of records and right block pointer in the
3499          * surviving block, and log it.
3500          */
3501         xfs_btree_set_numrecs(left, lrecs + rrecs);
3502         xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3503         xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3504         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3505
3506         /* If there is a right sibling, point it to the remaining block. */
3507         xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3508         if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3509                 error = xfs_btree_read_buf_block(cur, &cptr, level,
3510                                                         0, &rrblock, &rrbp);
3511                 if (error)
3512                         goto error0;
3513                 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3514                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3515         }
3516
3517         /* Free the deleted block. */
3518         error = cur->bc_ops->free_block(cur, rbp);
3519         if (error)
3520                 goto error0;
3521         XFS_BTREE_STATS_INC(cur, free);
3522
3523         /*
3524          * If we joined with the left neighbor, set the buffer in the
3525          * cursor to the left block, and fix up the index.
3526          */
3527         if (bp != lbp) {
3528                 cur->bc_bufs[level] = lbp;
3529                 cur->bc_ptrs[level] += lrecs;
3530                 cur->bc_ra[level] = 0;
3531         }
3532         /*
3533          * If we joined with the right neighbor and there's a level above
3534          * us, increment the cursor at that level.
3535          */
3536         else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3537                    (level + 1 < cur->bc_nlevels)) {
3538                 error = xfs_btree_increment(cur, level + 1, &i);
3539                 if (error)
3540                         goto error0;
3541         }
3542
3543         /*
3544          * Readjust the ptr at this level if it's not a leaf, since it's
3545          * still pointing at the deletion point, which makes the cursor
3546          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
3547          * We can't use decrement because it would change the next level up.
3548          */
3549         if (level > 0)
3550                 cur->bc_ptrs[level]--;
3551
3552         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3553         /* Return value means the next level up has something to do. */
3554         *stat = 2;
3555         return 0;
3556
3557 error0:
3558         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3559         if (tcur)
3560                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3561         return error;
3562 }
3563
3564 /*
3565  * Delete the record pointed to by cur.
3566  * The cursor refers to the place where the record was (could be inserted)
3567  * when the operation returns.
3568  */
3569 int                                     /* error */
3570 xfs_btree_delete(
3571         struct xfs_btree_cur    *cur,
3572         int                     *stat)  /* success/failure */
3573 {
3574         int                     error;  /* error return value */
3575         int                     level;
3576         int                     i;
3577
3578         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3579
3580         /*
3581          * Go up the tree, starting at leaf level.
3582          *
3583          * If 2 is returned then a join was done; go to the next level.
3584          * Otherwise we are done.
3585          */
3586         for (level = 0, i = 2; i == 2; level++) {
3587                 error = xfs_btree_delrec(cur, level, &i);
3588                 if (error)
3589                         goto error0;
3590         }
3591
3592         if (i == 0) {
3593                 for (level = 1; level < cur->bc_nlevels; level++) {
3594                         if (cur->bc_ptrs[level] == 0) {
3595                                 error = xfs_btree_decrement(cur, level, &i);
3596                                 if (error)
3597                                         goto error0;
3598                                 break;
3599                         }
3600                 }
3601         }
3602
3603         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3604         *stat = i;
3605         return 0;
3606 error0:
3607         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3608         return error;
3609 }
3610
3611 /*
3612  * Get the data from the pointed-to record.
3613  */
3614 int                                     /* error */
3615 xfs_btree_get_rec(
3616         struct xfs_btree_cur    *cur,   /* btree cursor */
3617         union xfs_btree_rec     **recp, /* output: btree record */
3618         int                     *stat)  /* output: success/failure */
3619 {
3620         struct xfs_btree_block  *block; /* btree block */
3621         struct xfs_buf          *bp;    /* buffer pointer */
3622         int                     ptr;    /* record number */
3623 #ifdef DEBUG
3624         int                     error;  /* error return value */
3625 #endif
3626
3627         ptr = cur->bc_ptrs[0];
3628         block = xfs_btree_get_block(cur, 0, &bp);
3629
3630 #ifdef DEBUG
3631         error = xfs_btree_check_block(cur, block, 0, bp);
3632         if (error)
3633                 return error;
3634 #endif
3635
3636         /*
3637          * Off the right end or left end, return failure.
3638          */
3639         if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3640                 *stat = 0;
3641                 return 0;
3642         }
3643
3644         /*
3645          * Point to the record and extract its data.
3646          */
3647         *recp = xfs_btree_rec_addr(cur, ptr, block);
3648         *stat = 1;
3649         return 0;
3650 }