[XFS] implement generic xfs_btree_updkey
[safe/jmp/linux-2.6] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_inode_item.h"
38 #include "xfs_btree.h"
39 #include "xfs_btree_trace.h"
40 #include "xfs_ialloc.h"
41 #include "xfs_error.h"
42
43 /*
44  * Cursor allocation zone.
45  */
46 kmem_zone_t     *xfs_btree_cur_zone;
47
48 /*
49  * Btree magic numbers.
50  */
51 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
52         XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
53 };
54
55 /*
56  * External routines.
57  */
58
59 #ifdef DEBUG
60 /*
61  * Debug routine: check that keys are in the right order.
62  */
63 void
64 xfs_btree_check_key(
65         xfs_btnum_t     btnum,          /* btree identifier */
66         void            *ak1,           /* pointer to left (lower) key */
67         void            *ak2)           /* pointer to right (higher) key */
68 {
69         switch (btnum) {
70         case XFS_BTNUM_BNO: {
71                 xfs_alloc_key_t *k1;
72                 xfs_alloc_key_t *k2;
73
74                 k1 = ak1;
75                 k2 = ak2;
76                 ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
77                 break;
78             }
79         case XFS_BTNUM_CNT: {
80                 xfs_alloc_key_t *k1;
81                 xfs_alloc_key_t *k2;
82
83                 k1 = ak1;
84                 k2 = ak2;
85                 ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
86                        (k1->ar_blockcount == k2->ar_blockcount &&
87                         be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
88                 break;
89             }
90         case XFS_BTNUM_BMAP: {
91                 xfs_bmbt_key_t  *k1;
92                 xfs_bmbt_key_t  *k2;
93
94                 k1 = ak1;
95                 k2 = ak2;
96                 ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
97                 break;
98             }
99         case XFS_BTNUM_INO: {
100                 xfs_inobt_key_t *k1;
101                 xfs_inobt_key_t *k2;
102
103                 k1 = ak1;
104                 k2 = ak2;
105                 ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
106                 break;
107             }
108         default:
109                 ASSERT(0);
110         }
111 }
112
113 /*
114  * Debug routine: check that records are in the right order.
115  */
116 void
117 xfs_btree_check_rec(
118         xfs_btnum_t     btnum,          /* btree identifier */
119         void            *ar1,           /* pointer to left (lower) record */
120         void            *ar2)           /* pointer to right (higher) record */
121 {
122         switch (btnum) {
123         case XFS_BTNUM_BNO: {
124                 xfs_alloc_rec_t *r1;
125                 xfs_alloc_rec_t *r2;
126
127                 r1 = ar1;
128                 r2 = ar2;
129                 ASSERT(be32_to_cpu(r1->ar_startblock) +
130                        be32_to_cpu(r1->ar_blockcount) <=
131                        be32_to_cpu(r2->ar_startblock));
132                 break;
133             }
134         case XFS_BTNUM_CNT: {
135                 xfs_alloc_rec_t *r1;
136                 xfs_alloc_rec_t *r2;
137
138                 r1 = ar1;
139                 r2 = ar2;
140                 ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
141                        (r1->ar_blockcount == r2->ar_blockcount &&
142                         be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
143                 break;
144             }
145         case XFS_BTNUM_BMAP: {
146                 xfs_bmbt_rec_t  *r1;
147                 xfs_bmbt_rec_t  *r2;
148
149                 r1 = ar1;
150                 r2 = ar2;
151                 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
152                        xfs_bmbt_disk_get_blockcount(r1) <=
153                        xfs_bmbt_disk_get_startoff(r2));
154                 break;
155             }
156         case XFS_BTNUM_INO: {
157                 xfs_inobt_rec_t *r1;
158                 xfs_inobt_rec_t *r2;
159
160                 r1 = ar1;
161                 r2 = ar2;
162                 ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
163                        be32_to_cpu(r2->ir_startino));
164                 break;
165             }
166         default:
167                 ASSERT(0);
168         }
169 }
170 #endif  /* DEBUG */
171
172 int                                     /* error (0 or EFSCORRUPTED) */
173 xfs_btree_check_lblock(
174         struct xfs_btree_cur    *cur,   /* btree cursor */
175         struct xfs_btree_lblock *block, /* btree long form block pointer */
176         int                     level,  /* level of the btree block */
177         struct xfs_buf          *bp)    /* buffer for block, if any */
178 {
179         int                     lblock_ok; /* block passes checks */
180         struct xfs_mount        *mp;    /* file system mount point */
181
182         mp = cur->bc_mp;
183         lblock_ok =
184                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
185                 be16_to_cpu(block->bb_level) == level &&
186                 be16_to_cpu(block->bb_numrecs) <=
187                         cur->bc_ops->get_maxrecs(cur, level) &&
188                 block->bb_leftsib &&
189                 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
190                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
191                 block->bb_rightsib &&
192                 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
193                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
194         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
195                         XFS_ERRTAG_BTREE_CHECK_LBLOCK,
196                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
197                 if (bp)
198                         xfs_buftrace("LBTREE ERROR", bp);
199                 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
200                                  mp);
201                 return XFS_ERROR(EFSCORRUPTED);
202         }
203         return 0;
204 }
205
206 int                                     /* error (0 or EFSCORRUPTED) */
207 xfs_btree_check_sblock(
208         struct xfs_btree_cur    *cur,   /* btree cursor */
209         struct xfs_btree_sblock *block, /* btree short form block pointer */
210         int                     level,  /* level of the btree block */
211         struct xfs_buf          *bp)    /* buffer containing block */
212 {
213         struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
214         struct xfs_agf          *agf;   /* ag. freespace structure */
215         xfs_agblock_t           agflen; /* native ag. freespace length */
216         int                     sblock_ok; /* block passes checks */
217
218         agbp = cur->bc_private.a.agbp;
219         agf = XFS_BUF_TO_AGF(agbp);
220         agflen = be32_to_cpu(agf->agf_length);
221         sblock_ok =
222                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
223                 be16_to_cpu(block->bb_level) == level &&
224                 be16_to_cpu(block->bb_numrecs) <=
225                         cur->bc_ops->get_maxrecs(cur, level) &&
226                 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
227                  be32_to_cpu(block->bb_leftsib) < agflen) &&
228                 block->bb_leftsib &&
229                 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
230                  be32_to_cpu(block->bb_rightsib) < agflen) &&
231                 block->bb_rightsib;
232         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
233                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
234                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
235                 if (bp)
236                         xfs_buftrace("SBTREE ERROR", bp);
237                 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
238                                  cur->bc_mp);
239                 return XFS_ERROR(EFSCORRUPTED);
240         }
241         return 0;
242 }
243
244 /*
245  * Debug routine: check that block header is ok.
246  */
247 int
248 xfs_btree_check_block(
249         struct xfs_btree_cur    *cur,   /* btree cursor */
250         struct xfs_btree_block  *block, /* generic btree block pointer */
251         int                     level,  /* level of the btree block */
252         struct xfs_buf          *bp)    /* buffer containing block, if any */
253 {
254         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
255                 return xfs_btree_check_lblock(cur,
256                                 (struct xfs_btree_lblock *)block, level, bp);
257         } else {
258                 return xfs_btree_check_sblock(cur,
259                                 (struct xfs_btree_sblock *)block, level, bp);
260         }
261 }
262
263 /*
264  * Check that (long) pointer is ok.
265  */
266 int                                     /* error (0 or EFSCORRUPTED) */
267 xfs_btree_check_lptr(
268         struct xfs_btree_cur    *cur,   /* btree cursor */
269         xfs_dfsbno_t            bno,    /* btree block disk address */
270         int                     level)  /* btree block level */
271 {
272         XFS_WANT_CORRUPTED_RETURN(
273                 level > 0 &&
274                 bno != NULLDFSBNO &&
275                 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
276         return 0;
277 }
278
279 /*
280  * Check that (short) pointer is ok.
281  */
282 int                                     /* error (0 or EFSCORRUPTED) */
283 xfs_btree_check_sptr(
284         struct xfs_btree_cur    *cur,   /* btree cursor */
285         xfs_agblock_t           bno,    /* btree block disk address */
286         int                     level)  /* btree block level */
287 {
288         xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
289
290         XFS_WANT_CORRUPTED_RETURN(
291                 level > 0 &&
292                 bno != NULLAGBLOCK &&
293                 bno != 0 &&
294                 bno < agblocks);
295         return 0;
296 }
297
298 /*
299  * Check that block ptr is ok.
300  */
301 int                                     /* error (0 or EFSCORRUPTED) */
302 xfs_btree_check_ptr(
303         struct xfs_btree_cur    *cur,   /* btree cursor */
304         union xfs_btree_ptr     *ptr,   /* btree block disk address */
305         int                     index,  /* offset from ptr to check */
306         int                     level)  /* btree block level */
307 {
308         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
309                 return xfs_btree_check_lptr(cur,
310                                 be64_to_cpu((&ptr->l)[index]), level);
311         } else {
312                 return xfs_btree_check_sptr(cur,
313                                 be32_to_cpu((&ptr->s)[index]), level);
314         }
315 }
316
317 /*
318  * Delete the btree cursor.
319  */
320 void
321 xfs_btree_del_cursor(
322         xfs_btree_cur_t *cur,           /* btree cursor */
323         int             error)          /* del because of error */
324 {
325         int             i;              /* btree level */
326
327         /*
328          * Clear the buffer pointers, and release the buffers.
329          * If we're doing this in the face of an error, we
330          * need to make sure to inspect all of the entries
331          * in the bc_bufs array for buffers to be unlocked.
332          * This is because some of the btree code works from
333          * level n down to 0, and if we get an error along
334          * the way we won't have initialized all the entries
335          * down to 0.
336          */
337         for (i = 0; i < cur->bc_nlevels; i++) {
338                 if (cur->bc_bufs[i])
339                         xfs_btree_setbuf(cur, i, NULL);
340                 else if (!error)
341                         break;
342         }
343         /*
344          * Can't free a bmap cursor without having dealt with the
345          * allocated indirect blocks' accounting.
346          */
347         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
348                cur->bc_private.b.allocated == 0);
349         /*
350          * Free the cursor.
351          */
352         kmem_zone_free(xfs_btree_cur_zone, cur);
353 }
354
355 /*
356  * Duplicate the btree cursor.
357  * Allocate a new one, copy the record, re-get the buffers.
358  */
359 int                                     /* error */
360 xfs_btree_dup_cursor(
361         xfs_btree_cur_t *cur,           /* input cursor */
362         xfs_btree_cur_t **ncur)         /* output cursor */
363 {
364         xfs_buf_t       *bp;            /* btree block's buffer pointer */
365         int             error;          /* error return value */
366         int             i;              /* level number of btree block */
367         xfs_mount_t     *mp;            /* mount structure for filesystem */
368         xfs_btree_cur_t *new;           /* new cursor value */
369         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
370
371         tp = cur->bc_tp;
372         mp = cur->bc_mp;
373
374         /*
375          * Allocate a new cursor like the old one.
376          */
377         new = cur->bc_ops->dup_cursor(cur);
378
379         /*
380          * Copy the record currently in the cursor.
381          */
382         new->bc_rec = cur->bc_rec;
383
384         /*
385          * For each level current, re-get the buffer and copy the ptr value.
386          */
387         for (i = 0; i < new->bc_nlevels; i++) {
388                 new->bc_ptrs[i] = cur->bc_ptrs[i];
389                 new->bc_ra[i] = cur->bc_ra[i];
390                 if ((bp = cur->bc_bufs[i])) {
391                         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
392                                 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
393                                 xfs_btree_del_cursor(new, error);
394                                 *ncur = NULL;
395                                 return error;
396                         }
397                         new->bc_bufs[i] = bp;
398                         ASSERT(bp);
399                         ASSERT(!XFS_BUF_GETERROR(bp));
400                 } else
401                         new->bc_bufs[i] = NULL;
402         }
403         *ncur = new;
404         return 0;
405 }
406
407 /*
408  * XFS btree block layout and addressing:
409  *
410  * There are two types of blocks in the btree: leaf and non-leaf blocks.
411  *
412  * The leaf record start with a header then followed by records containing
413  * the values.  A non-leaf block also starts with the same header, and
414  * then first contains lookup keys followed by an equal number of pointers
415  * to the btree blocks at the previous level.
416  *
417  *              +--------+-------+-------+-------+-------+-------+-------+
418  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
419  *              +--------+-------+-------+-------+-------+-------+-------+
420  *
421  *              +--------+-------+-------+-------+-------+-------+-------+
422  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
423  *              +--------+-------+-------+-------+-------+-------+-------+
424  *
425  * The header is called struct xfs_btree_block for reasons better left unknown
426  * and comes in different versions for short (32bit) and long (64bit) block
427  * pointers.  The record and key structures are defined by the btree instances
428  * and opaque to the btree core.  The block pointers are simple disk endian
429  * integers, available in a short (32bit) and long (64bit) variant.
430  *
431  * The helpers below calculate the offset of a given record, key or pointer
432  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
433  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
434  * inside the btree block is done using indices starting at one, not zero!
435  */
436
437 /*
438  * Return size of the btree block header for this btree instance.
439  */
440 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
441 {
442         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
443                 sizeof(struct xfs_btree_lblock) :
444                 sizeof(struct xfs_btree_sblock);
445 }
446
447 /*
448  * Return size of btree block pointers for this btree instance.
449  */
450 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
451 {
452         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
453                 sizeof(__be64) : sizeof(__be32);
454 }
455
456 /*
457  * Calculate offset of the n-th record in a btree block.
458  */
459 STATIC size_t
460 xfs_btree_rec_offset(
461         struct xfs_btree_cur    *cur,
462         int                     n)
463 {
464         return xfs_btree_block_len(cur) +
465                 (n - 1) * cur->bc_ops->rec_len;
466 }
467
468 /*
469  * Calculate offset of the n-th key in a btree block.
470  */
471 STATIC size_t
472 xfs_btree_key_offset(
473         struct xfs_btree_cur    *cur,
474         int                     n)
475 {
476         return xfs_btree_block_len(cur) +
477                 (n - 1) * cur->bc_ops->key_len;
478 }
479
480 /*
481  * Calculate offset of the n-th block pointer in a btree block.
482  */
483 STATIC size_t
484 xfs_btree_ptr_offset(
485         struct xfs_btree_cur    *cur,
486         int                     n,
487         int                     level)
488 {
489         return xfs_btree_block_len(cur) +
490                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
491                 (n - 1) * xfs_btree_ptr_len(cur);
492 }
493
494 /*
495  * Return a pointer to the n-th record in the btree block.
496  */
497 STATIC union xfs_btree_rec *
498 xfs_btree_rec_addr(
499         struct xfs_btree_cur    *cur,
500         int                     n,
501         struct xfs_btree_block  *block)
502 {
503         return (union xfs_btree_rec *)
504                 ((char *)block + xfs_btree_rec_offset(cur, n));
505 }
506
507 /*
508  * Return a pointer to the n-th key in the btree block.
509  */
510 STATIC union xfs_btree_key *
511 xfs_btree_key_addr(
512         struct xfs_btree_cur    *cur,
513         int                     n,
514         struct xfs_btree_block  *block)
515 {
516         return (union xfs_btree_key *)
517                 ((char *)block + xfs_btree_key_offset(cur, n));
518 }
519
520 /*
521  * Return a pointer to the n-th block pointer in the btree block.
522  */
523 STATIC union xfs_btree_ptr *
524 xfs_btree_ptr_addr(
525         struct xfs_btree_cur    *cur,
526         int                     n,
527         struct xfs_btree_block  *block)
528 {
529         int                     level = xfs_btree_get_level(block);
530
531         ASSERT(block->bb_level != 0);
532
533         return (union xfs_btree_ptr *)
534                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
535 }
536
537 /*
538  * Get a the root block which is stored in the inode.
539  *
540  * For now this btree implementation assumes the btree root is always
541  * stored in the if_broot field of an inode fork.
542  */
543 STATIC struct xfs_btree_block *
544 xfs_btree_get_iroot(
545        struct xfs_btree_cur    *cur)
546 {
547        struct xfs_ifork        *ifp;
548
549        ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
550        return (struct xfs_btree_block *)ifp->if_broot;
551 }
552
553 /*
554  * Retrieve the block pointer from the cursor at the given level.
555  * This may be an inode btree root or from a buffer.
556  */
557 STATIC struct xfs_btree_block *         /* generic btree block pointer */
558 xfs_btree_get_block(
559         struct xfs_btree_cur    *cur,   /* btree cursor */
560         int                     level,  /* level in btree */
561         struct xfs_buf          **bpp)  /* buffer containing the block */
562 {
563         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
564             (level == cur->bc_nlevels - 1)) {
565                 *bpp = NULL;
566                 return xfs_btree_get_iroot(cur);
567         }
568
569         *bpp = cur->bc_bufs[level];
570         return XFS_BUF_TO_BLOCK(*bpp);
571 }
572
573 /*
574  * Get a buffer for the block, return it with no data read.
575  * Long-form addressing.
576  */
577 xfs_buf_t *                             /* buffer for fsbno */
578 xfs_btree_get_bufl(
579         xfs_mount_t     *mp,            /* file system mount point */
580         xfs_trans_t     *tp,            /* transaction pointer */
581         xfs_fsblock_t   fsbno,          /* file system block number */
582         uint            lock)           /* lock flags for get_buf */
583 {
584         xfs_buf_t       *bp;            /* buffer pointer (return value) */
585         xfs_daddr_t             d;              /* real disk block address */
586
587         ASSERT(fsbno != NULLFSBLOCK);
588         d = XFS_FSB_TO_DADDR(mp, fsbno);
589         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
590         ASSERT(bp);
591         ASSERT(!XFS_BUF_GETERROR(bp));
592         return bp;
593 }
594
595 /*
596  * Get a buffer for the block, return it with no data read.
597  * Short-form addressing.
598  */
599 xfs_buf_t *                             /* buffer for agno/agbno */
600 xfs_btree_get_bufs(
601         xfs_mount_t     *mp,            /* file system mount point */
602         xfs_trans_t     *tp,            /* transaction pointer */
603         xfs_agnumber_t  agno,           /* allocation group number */
604         xfs_agblock_t   agbno,          /* allocation group block number */
605         uint            lock)           /* lock flags for get_buf */
606 {
607         xfs_buf_t       *bp;            /* buffer pointer (return value) */
608         xfs_daddr_t             d;              /* real disk block address */
609
610         ASSERT(agno != NULLAGNUMBER);
611         ASSERT(agbno != NULLAGBLOCK);
612         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
613         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
614         ASSERT(bp);
615         ASSERT(!XFS_BUF_GETERROR(bp));
616         return bp;
617 }
618
619 /*
620  * Check for the cursor referring to the last block at the given level.
621  */
622 int                                     /* 1=is last block, 0=not last block */
623 xfs_btree_islastblock(
624         xfs_btree_cur_t         *cur,   /* btree cursor */
625         int                     level)  /* level to check */
626 {
627         xfs_btree_block_t       *block; /* generic btree block pointer */
628         xfs_buf_t               *bp;    /* buffer containing block */
629
630         block = xfs_btree_get_block(cur, level, &bp);
631         xfs_btree_check_block(cur, block, level, bp);
632         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
633                 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
634         else
635                 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
636 }
637
638 /*
639  * Change the cursor to point to the first record at the given level.
640  * Other levels are unaffected.
641  */
642 int                                     /* success=1, failure=0 */
643 xfs_btree_firstrec(
644         xfs_btree_cur_t         *cur,   /* btree cursor */
645         int                     level)  /* level to change */
646 {
647         xfs_btree_block_t       *block; /* generic btree block pointer */
648         xfs_buf_t               *bp;    /* buffer containing block */
649
650         /*
651          * Get the block pointer for this level.
652          */
653         block = xfs_btree_get_block(cur, level, &bp);
654         xfs_btree_check_block(cur, block, level, bp);
655         /*
656          * It's empty, there is no such record.
657          */
658         if (!block->bb_numrecs)
659                 return 0;
660         /*
661          * Set the ptr value to 1, that's the first record/key.
662          */
663         cur->bc_ptrs[level] = 1;
664         return 1;
665 }
666
667 /*
668  * Change the cursor to point to the last record in the current block
669  * at the given level.  Other levels are unaffected.
670  */
671 int                                     /* success=1, failure=0 */
672 xfs_btree_lastrec(
673         xfs_btree_cur_t         *cur,   /* btree cursor */
674         int                     level)  /* level to change */
675 {
676         xfs_btree_block_t       *block; /* generic btree block pointer */
677         xfs_buf_t               *bp;    /* buffer containing block */
678
679         /*
680          * Get the block pointer for this level.
681          */
682         block = xfs_btree_get_block(cur, level, &bp);
683         xfs_btree_check_block(cur, block, level, bp);
684         /*
685          * It's empty, there is no such record.
686          */
687         if (!block->bb_numrecs)
688                 return 0;
689         /*
690          * Set the ptr value to numrecs, that's the last record/key.
691          */
692         cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
693         return 1;
694 }
695
696 /*
697  * Compute first and last byte offsets for the fields given.
698  * Interprets the offsets table, which contains struct field offsets.
699  */
700 void
701 xfs_btree_offsets(
702         __int64_t       fields,         /* bitmask of fields */
703         const short     *offsets,       /* table of field offsets */
704         int             nbits,          /* number of bits to inspect */
705         int             *first,         /* output: first byte offset */
706         int             *last)          /* output: last byte offset */
707 {
708         int             i;              /* current bit number */
709         __int64_t       imask;          /* mask for current bit number */
710
711         ASSERT(fields != 0);
712         /*
713          * Find the lowest bit, so the first byte offset.
714          */
715         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
716                 if (imask & fields) {
717                         *first = offsets[i];
718                         break;
719                 }
720         }
721         /*
722          * Find the highest bit, so the last byte offset.
723          */
724         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
725                 if (imask & fields) {
726                         *last = offsets[i + 1] - 1;
727                         break;
728                 }
729         }
730 }
731
732 /*
733  * Get a buffer for the block, return it read in.
734  * Long-form addressing.
735  */
736 int                                     /* error */
737 xfs_btree_read_bufl(
738         xfs_mount_t     *mp,            /* file system mount point */
739         xfs_trans_t     *tp,            /* transaction pointer */
740         xfs_fsblock_t   fsbno,          /* file system block number */
741         uint            lock,           /* lock flags for read_buf */
742         xfs_buf_t       **bpp,          /* buffer for fsbno */
743         int             refval)         /* ref count value for buffer */
744 {
745         xfs_buf_t       *bp;            /* return value */
746         xfs_daddr_t             d;              /* real disk block address */
747         int             error;
748
749         ASSERT(fsbno != NULLFSBLOCK);
750         d = XFS_FSB_TO_DADDR(mp, fsbno);
751         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
752                         mp->m_bsize, lock, &bp))) {
753                 return error;
754         }
755         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
756         if (bp != NULL) {
757                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
758         }
759         *bpp = bp;
760         return 0;
761 }
762
763 /*
764  * Get a buffer for the block, return it read in.
765  * Short-form addressing.
766  */
767 int                                     /* error */
768 xfs_btree_read_bufs(
769         xfs_mount_t     *mp,            /* file system mount point */
770         xfs_trans_t     *tp,            /* transaction pointer */
771         xfs_agnumber_t  agno,           /* allocation group number */
772         xfs_agblock_t   agbno,          /* allocation group block number */
773         uint            lock,           /* lock flags for read_buf */
774         xfs_buf_t       **bpp,          /* buffer for agno/agbno */
775         int             refval)         /* ref count value for buffer */
776 {
777         xfs_buf_t       *bp;            /* return value */
778         xfs_daddr_t     d;              /* real disk block address */
779         int             error;
780
781         ASSERT(agno != NULLAGNUMBER);
782         ASSERT(agbno != NULLAGBLOCK);
783         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
784         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
785                                         mp->m_bsize, lock, &bp))) {
786                 return error;
787         }
788         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
789         if (bp != NULL) {
790                 switch (refval) {
791                 case XFS_ALLOC_BTREE_REF:
792                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
793                         break;
794                 case XFS_INO_BTREE_REF:
795                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
796                         break;
797                 }
798         }
799         *bpp = bp;
800         return 0;
801 }
802
803 /*
804  * Read-ahead the block, don't wait for it, don't return a buffer.
805  * Long-form addressing.
806  */
807 /* ARGSUSED */
808 void
809 xfs_btree_reada_bufl(
810         xfs_mount_t     *mp,            /* file system mount point */
811         xfs_fsblock_t   fsbno,          /* file system block number */
812         xfs_extlen_t    count)          /* count of filesystem blocks */
813 {
814         xfs_daddr_t             d;
815
816         ASSERT(fsbno != NULLFSBLOCK);
817         d = XFS_FSB_TO_DADDR(mp, fsbno);
818         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
819 }
820
821 /*
822  * Read-ahead the block, don't wait for it, don't return a buffer.
823  * Short-form addressing.
824  */
825 /* ARGSUSED */
826 void
827 xfs_btree_reada_bufs(
828         xfs_mount_t     *mp,            /* file system mount point */
829         xfs_agnumber_t  agno,           /* allocation group number */
830         xfs_agblock_t   agbno,          /* allocation group block number */
831         xfs_extlen_t    count)          /* count of filesystem blocks */
832 {
833         xfs_daddr_t             d;
834
835         ASSERT(agno != NULLAGNUMBER);
836         ASSERT(agbno != NULLAGBLOCK);
837         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
838         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
839 }
840
841 STATIC int
842 xfs_btree_readahead_lblock(
843         struct xfs_btree_cur    *cur,
844         int                     lr,
845         struct xfs_btree_block  *block)
846 {
847         int                     rval = 0;
848         xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
849         xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
850
851         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
852                 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
853                 rval++;
854         }
855
856         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
857                 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
858                 rval++;
859         }
860
861         return rval;
862 }
863
864 STATIC int
865 xfs_btree_readahead_sblock(
866         struct xfs_btree_cur    *cur,
867         int                     lr,
868         struct xfs_btree_block *block)
869 {
870         int                     rval = 0;
871         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
872         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
873
874
875         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
876                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
877                                      left, 1);
878                 rval++;
879         }
880
881         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
882                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
883                                      right, 1);
884                 rval++;
885         }
886
887         return rval;
888 }
889
890 /*
891  * Read-ahead btree blocks, at the given level.
892  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
893  */
894 int
895 xfs_btree_readahead(
896         struct xfs_btree_cur    *cur,           /* btree cursor */
897         int                     lev,            /* level in btree */
898         int                     lr)             /* left/right bits */
899 {
900         struct xfs_btree_block  *block;
901
902         /*
903          * No readahead needed if we are at the root level and the
904          * btree root is stored in the inode.
905          */
906         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
907             (lev == cur->bc_nlevels - 1))
908                 return 0;
909
910         if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
911                 return 0;
912
913         cur->bc_ra[lev] |= lr;
914         block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
915
916         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
917                 return xfs_btree_readahead_lblock(cur, lr, block);
918         return xfs_btree_readahead_sblock(cur, lr, block);
919 }
920
921 /*
922  * Set the buffer for level "lev" in the cursor to bp, releasing
923  * any previous buffer.
924  */
925 void
926 xfs_btree_setbuf(
927         xfs_btree_cur_t         *cur,   /* btree cursor */
928         int                     lev,    /* level in btree */
929         xfs_buf_t               *bp)    /* new buffer to set */
930 {
931         xfs_btree_block_t       *b;     /* btree block */
932         xfs_buf_t               *obp;   /* old buffer pointer */
933
934         obp = cur->bc_bufs[lev];
935         if (obp)
936                 xfs_trans_brelse(cur->bc_tp, obp);
937         cur->bc_bufs[lev] = bp;
938         cur->bc_ra[lev] = 0;
939         if (!bp)
940                 return;
941         b = XFS_BUF_TO_BLOCK(bp);
942         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
943                 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
944                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
945                 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
946                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
947         } else {
948                 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
949                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
950                 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
951                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
952         }
953 }
954
955 STATIC int
956 xfs_btree_ptr_is_null(
957         struct xfs_btree_cur    *cur,
958         union xfs_btree_ptr     *ptr)
959 {
960         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
961                 return be64_to_cpu(ptr->l) == NULLFSBLOCK;
962         else
963                 return be32_to_cpu(ptr->s) == NULLAGBLOCK;
964 }
965
966 /*
967  * Get/set/init sibling pointers
968  */
969 STATIC void
970 xfs_btree_get_sibling(
971         struct xfs_btree_cur    *cur,
972         struct xfs_btree_block  *block,
973         union xfs_btree_ptr     *ptr,
974         int                     lr)
975 {
976         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
977
978         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
979                 if (lr == XFS_BB_RIGHTSIB)
980                         ptr->l = block->bb_u.l.bb_rightsib;
981                 else
982                         ptr->l = block->bb_u.l.bb_leftsib;
983         } else {
984                 if (lr == XFS_BB_RIGHTSIB)
985                         ptr->s = block->bb_u.s.bb_rightsib;
986                 else
987                         ptr->s = block->bb_u.s.bb_leftsib;
988         }
989 }
990
991 STATIC xfs_daddr_t
992 xfs_btree_ptr_to_daddr(
993         struct xfs_btree_cur    *cur,
994         union xfs_btree_ptr     *ptr)
995 {
996         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
997                 ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK);
998
999                 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
1000         } else {
1001                 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
1002                 ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
1003
1004                 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
1005                                         be32_to_cpu(ptr->s));
1006         }
1007 }
1008
1009 STATIC void
1010 xfs_btree_set_refs(
1011         struct xfs_btree_cur    *cur,
1012         struct xfs_buf          *bp)
1013 {
1014         switch (cur->bc_btnum) {
1015         case XFS_BTNUM_BNO:
1016         case XFS_BTNUM_CNT:
1017                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
1018                 break;
1019         case XFS_BTNUM_INO:
1020                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
1021                 break;
1022         case XFS_BTNUM_BMAP:
1023                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
1024                 break;
1025         default:
1026                 ASSERT(0);
1027         }
1028 }
1029
1030 /*
1031  * Read in the buffer at the given ptr and return the buffer and
1032  * the block pointer within the buffer.
1033  */
1034 STATIC int
1035 xfs_btree_read_buf_block(
1036         struct xfs_btree_cur    *cur,
1037         union xfs_btree_ptr     *ptr,
1038         int                     level,
1039         int                     flags,
1040         struct xfs_btree_block  **block,
1041         struct xfs_buf          **bpp)
1042 {
1043         struct xfs_mount        *mp = cur->bc_mp;
1044         xfs_daddr_t             d;
1045         int                     error;
1046
1047         /* need to sort out how callers deal with failures first */
1048         ASSERT(!(flags & XFS_BUF_TRYLOCK));
1049
1050         d = xfs_btree_ptr_to_daddr(cur, ptr);
1051         error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1052                                    mp->m_bsize, flags, bpp);
1053         if (error)
1054                 return error;
1055
1056         ASSERT(*bpp != NULL);
1057         ASSERT(!XFS_BUF_GETERROR(*bpp));
1058
1059         xfs_btree_set_refs(cur, *bpp);
1060         *block = XFS_BUF_TO_BLOCK(*bpp);
1061
1062         error = xfs_btree_check_block(cur, *block, level, *bpp);
1063         if (error)
1064                 xfs_trans_brelse(cur->bc_tp, *bpp);
1065         return error;
1066 }
1067
1068 /*
1069  * Copy keys from one btree block to another.
1070  */
1071 STATIC void
1072 xfs_btree_copy_keys(
1073         struct xfs_btree_cur    *cur,
1074         union xfs_btree_key     *dst_key,
1075         union xfs_btree_key     *src_key,
1076         int                     numkeys)
1077 {
1078         ASSERT(numkeys >= 0);
1079         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1080 }
1081
1082 /*
1083  * Log key values from the btree block.
1084  */
1085 STATIC void
1086 xfs_btree_log_keys(
1087         struct xfs_btree_cur    *cur,
1088         struct xfs_buf          *bp,
1089         int                     first,
1090         int                     last)
1091 {
1092         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1093         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1094
1095         if (bp) {
1096                 xfs_trans_log_buf(cur->bc_tp, bp,
1097                                   xfs_btree_key_offset(cur, first),
1098                                   xfs_btree_key_offset(cur, last + 1) - 1);
1099         } else {
1100                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1101                                 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1102         }
1103
1104         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1105 }
1106
1107 /*
1108  * Increment cursor by one record at the level.
1109  * For nonzero levels the leaf-ward information is untouched.
1110  */
1111 int                                             /* error */
1112 xfs_btree_increment(
1113         struct xfs_btree_cur    *cur,
1114         int                     level,
1115         int                     *stat)          /* success/failure */
1116 {
1117         struct xfs_btree_block  *block;
1118         union xfs_btree_ptr     ptr;
1119         struct xfs_buf          *bp;
1120         int                     error;          /* error return value */
1121         int                     lev;
1122
1123         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1124         XFS_BTREE_TRACE_ARGI(cur, level);
1125
1126         ASSERT(level < cur->bc_nlevels);
1127
1128         /* Read-ahead to the right at this level. */
1129         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1130
1131         /* Get a pointer to the btree block. */
1132         block = xfs_btree_get_block(cur, level, &bp);
1133
1134 #ifdef DEBUG
1135         error = xfs_btree_check_block(cur, block, level, bp);
1136         if (error)
1137                 goto error0;
1138 #endif
1139
1140         /* We're done if we remain in the block after the increment. */
1141         if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1142                 goto out1;
1143
1144         /* Fail if we just went off the right edge of the tree. */
1145         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1146         if (xfs_btree_ptr_is_null(cur, &ptr))
1147                 goto out0;
1148
1149         XFS_BTREE_STATS_INC(cur, increment);
1150
1151         /*
1152          * March up the tree incrementing pointers.
1153          * Stop when we don't go off the right edge of a block.
1154          */
1155         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1156                 block = xfs_btree_get_block(cur, lev, &bp);
1157
1158 #ifdef DEBUG
1159                 error = xfs_btree_check_block(cur, block, lev, bp);
1160                 if (error)
1161                         goto error0;
1162 #endif
1163
1164                 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1165                         break;
1166
1167                 /* Read-ahead the right block for the next loop. */
1168                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1169         }
1170
1171         /*
1172          * If we went off the root then we are either seriously
1173          * confused or have the tree root in an inode.
1174          */
1175         if (lev == cur->bc_nlevels) {
1176                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1177                         goto out0;
1178                 ASSERT(0);
1179                 error = EFSCORRUPTED;
1180                 goto error0;
1181         }
1182         ASSERT(lev < cur->bc_nlevels);
1183
1184         /*
1185          * Now walk back down the tree, fixing up the cursor's buffer
1186          * pointers and key numbers.
1187          */
1188         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1189                 union xfs_btree_ptr     *ptrp;
1190
1191                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1192                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1193                                                         0, &block, &bp);
1194                 if (error)
1195                         goto error0;
1196
1197                 xfs_btree_setbuf(cur, lev, bp);
1198                 cur->bc_ptrs[lev] = 1;
1199         }
1200 out1:
1201         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1202         *stat = 1;
1203         return 0;
1204
1205 out0:
1206         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1207         *stat = 0;
1208         return 0;
1209
1210 error0:
1211         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1212         return error;
1213 }
1214
1215 /*
1216  * Decrement cursor by one record at the level.
1217  * For nonzero levels the leaf-ward information is untouched.
1218  */
1219 int                                             /* error */
1220 xfs_btree_decrement(
1221         struct xfs_btree_cur    *cur,
1222         int                     level,
1223         int                     *stat)          /* success/failure */
1224 {
1225         struct xfs_btree_block  *block;
1226         xfs_buf_t               *bp;
1227         int                     error;          /* error return value */
1228         int                     lev;
1229         union xfs_btree_ptr     ptr;
1230
1231         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1232         XFS_BTREE_TRACE_ARGI(cur, level);
1233
1234         ASSERT(level < cur->bc_nlevels);
1235
1236         /* Read-ahead to the left at this level. */
1237         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1238
1239         /* We're done if we remain in the block after the decrement. */
1240         if (--cur->bc_ptrs[level] > 0)
1241                 goto out1;
1242
1243         /* Get a pointer to the btree block. */
1244         block = xfs_btree_get_block(cur, level, &bp);
1245
1246 #ifdef DEBUG
1247         error = xfs_btree_check_block(cur, block, level, bp);
1248         if (error)
1249                 goto error0;
1250 #endif
1251
1252         /* Fail if we just went off the left edge of the tree. */
1253         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1254         if (xfs_btree_ptr_is_null(cur, &ptr))
1255                 goto out0;
1256
1257         XFS_BTREE_STATS_INC(cur, decrement);
1258
1259         /*
1260          * March up the tree decrementing pointers.
1261          * Stop when we don't go off the left edge of a block.
1262          */
1263         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1264                 if (--cur->bc_ptrs[lev] > 0)
1265                         break;
1266                 /* Read-ahead the left block for the next loop. */
1267                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1268         }
1269
1270         /*
1271          * If we went off the root then we are seriously confused.
1272          * or the root of the tree is in an inode.
1273          */
1274         if (lev == cur->bc_nlevels) {
1275                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1276                         goto out0;
1277                 ASSERT(0);
1278                 error = EFSCORRUPTED;
1279                 goto error0;
1280         }
1281         ASSERT(lev < cur->bc_nlevels);
1282
1283         /*
1284          * Now walk back down the tree, fixing up the cursor's buffer
1285          * pointers and key numbers.
1286          */
1287         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1288                 union xfs_btree_ptr     *ptrp;
1289
1290                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1291                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1292                                                         0, &block, &bp);
1293                 if (error)
1294                         goto error0;
1295                 xfs_btree_setbuf(cur, lev, bp);
1296                 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1297         }
1298 out1:
1299         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1300         *stat = 1;
1301         return 0;
1302
1303 out0:
1304         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1305         *stat = 0;
1306         return 0;
1307
1308 error0:
1309         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1310         return error;
1311 }
1312
1313
1314 STATIC int
1315 xfs_btree_lookup_get_block(
1316         struct xfs_btree_cur    *cur,   /* btree cursor */
1317         int                     level,  /* level in the btree */
1318         union xfs_btree_ptr     *pp,    /* ptr to btree block */
1319         struct xfs_btree_block  **blkp) /* return btree block */
1320 {
1321         struct xfs_buf          *bp;    /* buffer pointer for btree block */
1322         int                     error = 0;
1323
1324         /* special case the root block if in an inode */
1325         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1326             (level == cur->bc_nlevels - 1)) {
1327                 *blkp = xfs_btree_get_iroot(cur);
1328                 return 0;
1329         }
1330
1331         /*
1332          * If the old buffer at this level for the disk address we are
1333          * looking for re-use it.
1334          *
1335          * Otherwise throw it away and get a new one.
1336          */
1337         bp = cur->bc_bufs[level];
1338         if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1339                 *blkp = XFS_BUF_TO_BLOCK(bp);
1340                 return 0;
1341         }
1342
1343         error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1344         if (error)
1345                 return error;
1346
1347         xfs_btree_setbuf(cur, level, bp);
1348         return 0;
1349 }
1350
1351 /*
1352  * Get current search key.  For level 0 we don't actually have a key
1353  * structure so we make one up from the record.  For all other levels
1354  * we just return the right key.
1355  */
1356 STATIC union xfs_btree_key *
1357 xfs_lookup_get_search_key(
1358         struct xfs_btree_cur    *cur,
1359         int                     level,
1360         int                     keyno,
1361         struct xfs_btree_block  *block,
1362         union xfs_btree_key     *kp)
1363 {
1364         if (level == 0) {
1365                 cur->bc_ops->init_key_from_rec(kp,
1366                                 xfs_btree_rec_addr(cur, keyno, block));
1367                 return kp;
1368         }
1369
1370         return xfs_btree_key_addr(cur, keyno, block);
1371 }
1372
1373 /*
1374  * Lookup the record.  The cursor is made to point to it, based on dir.
1375  * Return 0 if can't find any such record, 1 for success.
1376  */
1377 int                                     /* error */
1378 xfs_btree_lookup(
1379         struct xfs_btree_cur    *cur,   /* btree cursor */
1380         xfs_lookup_t            dir,    /* <=, ==, or >= */
1381         int                     *stat)  /* success/failure */
1382 {
1383         struct xfs_btree_block  *block; /* current btree block */
1384         __int64_t               diff;   /* difference for the current key */
1385         int                     error;  /* error return value */
1386         int                     keyno;  /* current key number */
1387         int                     level;  /* level in the btree */
1388         union xfs_btree_ptr     *pp;    /* ptr to btree block */
1389         union xfs_btree_ptr     ptr;    /* ptr to btree block */
1390
1391         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1392         XFS_BTREE_TRACE_ARGI(cur, dir);
1393
1394         XFS_BTREE_STATS_INC(cur, lookup);
1395
1396         block = NULL;
1397         keyno = 0;
1398
1399         /* initialise start pointer from cursor */
1400         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1401         pp = &ptr;
1402
1403         /*
1404          * Iterate over each level in the btree, starting at the root.
1405          * For each level above the leaves, find the key we need, based
1406          * on the lookup record, then follow the corresponding block
1407          * pointer down to the next level.
1408          */
1409         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1410                 /* Get the block we need to do the lookup on. */
1411                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1412                 if (error)
1413                         goto error0;
1414
1415                 if (diff == 0) {
1416                         /*
1417                          * If we already had a key match at a higher level, we
1418                          * know we need to use the first entry in this block.
1419                          */
1420                         keyno = 1;
1421                 } else {
1422                         /* Otherwise search this block. Do a binary search. */
1423
1424                         int     high;   /* high entry number */
1425                         int     low;    /* low entry number */
1426
1427                         /* Set low and high entry numbers, 1-based. */
1428                         low = 1;
1429                         high = xfs_btree_get_numrecs(block);
1430                         if (!high) {
1431                                 /* Block is empty, must be an empty leaf. */
1432                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
1433
1434                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1435                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1436                                 *stat = 0;
1437                                 return 0;
1438                         }
1439
1440                         /* Binary search the block. */
1441                         while (low <= high) {
1442                                 union xfs_btree_key     key;
1443                                 union xfs_btree_key     *kp;
1444
1445                                 XFS_BTREE_STATS_INC(cur, compare);
1446
1447                                 /* keyno is average of low and high. */
1448                                 keyno = (low + high) >> 1;
1449
1450                                 /* Get current search key */
1451                                 kp = xfs_lookup_get_search_key(cur, level,
1452                                                 keyno, block, &key);
1453
1454                                 /*
1455                                  * Compute difference to get next direction:
1456                                  *  - less than, move right
1457                                  *  - greater than, move left
1458                                  *  - equal, we're done
1459                                  */
1460                                 diff = cur->bc_ops->key_diff(cur, kp);
1461                                 if (diff < 0)
1462                                         low = keyno + 1;
1463                                 else if (diff > 0)
1464                                         high = keyno - 1;
1465                                 else
1466                                         break;
1467                         }
1468                 }
1469
1470                 /*
1471                  * If there are more levels, set up for the next level
1472                  * by getting the block number and filling in the cursor.
1473                  */
1474                 if (level > 0) {
1475                         /*
1476                          * If we moved left, need the previous key number,
1477                          * unless there isn't one.
1478                          */
1479                         if (diff > 0 && --keyno < 1)
1480                                 keyno = 1;
1481                         pp = xfs_btree_ptr_addr(cur, keyno, block);
1482
1483 #ifdef DEBUG
1484                         error = xfs_btree_check_ptr(cur, pp, 0, level);
1485                         if (error)
1486                                 goto error0;
1487 #endif
1488                         cur->bc_ptrs[level] = keyno;
1489                 }
1490         }
1491
1492         /* Done with the search. See if we need to adjust the results. */
1493         if (dir != XFS_LOOKUP_LE && diff < 0) {
1494                 keyno++;
1495                 /*
1496                  * If ge search and we went off the end of the block, but it's
1497                  * not the last block, we're in the wrong block.
1498                  */
1499                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1500                 if (dir == XFS_LOOKUP_GE &&
1501                     keyno > xfs_btree_get_numrecs(block) &&
1502                     !xfs_btree_ptr_is_null(cur, &ptr)) {
1503                         int     i;
1504
1505                         cur->bc_ptrs[0] = keyno;
1506                         error = xfs_btree_increment(cur, 0, &i);
1507                         if (error)
1508                                 goto error0;
1509                         XFS_WANT_CORRUPTED_RETURN(i == 1);
1510                         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1511                         *stat = 1;
1512                         return 0;
1513                 }
1514         } else if (dir == XFS_LOOKUP_LE && diff > 0)
1515                 keyno--;
1516         cur->bc_ptrs[0] = keyno;
1517
1518         /* Return if we succeeded or not. */
1519         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1520                 *stat = 0;
1521         else if (dir != XFS_LOOKUP_EQ || diff == 0)
1522                 *stat = 1;
1523         else
1524                 *stat = 0;
1525         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1526         return 0;
1527
1528 error0:
1529         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1530         return error;
1531 }
1532
1533 /*
1534  * Update keys at all levels from here to the root along the cursor's path.
1535  */
1536 int
1537 xfs_btree_updkey(
1538         struct xfs_btree_cur    *cur,
1539         union xfs_btree_key     *keyp,
1540         int                     level)
1541 {
1542         struct xfs_btree_block  *block;
1543         struct xfs_buf          *bp;
1544         union xfs_btree_key     *kp;
1545         int                     ptr;
1546
1547         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1548         XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1549
1550         ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1551
1552         /*
1553          * Go up the tree from this level toward the root.
1554          * At each level, update the key value to the value input.
1555          * Stop when we reach a level where the cursor isn't pointing
1556          * at the first entry in the block.
1557          */
1558         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1559 #ifdef DEBUG
1560                 int             error;
1561 #endif
1562                 block = xfs_btree_get_block(cur, level, &bp);
1563 #ifdef DEBUG
1564                 error = xfs_btree_check_block(cur, block, level, bp);
1565                 if (error) {
1566                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1567                         return error;
1568                 }
1569 #endif
1570                 ptr = cur->bc_ptrs[level];
1571                 kp = xfs_btree_key_addr(cur, ptr, block);
1572                 xfs_btree_copy_keys(cur, kp, keyp, 1);
1573                 xfs_btree_log_keys(cur, bp, ptr, ptr);
1574         }
1575
1576         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1577         return 0;
1578 }