[XFS] implement generic xfs_btree_update
[safe/jmp/linux-2.6] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_inode_item.h"
38 #include "xfs_btree.h"
39 #include "xfs_btree_trace.h"
40 #include "xfs_ialloc.h"
41 #include "xfs_error.h"
42
43 /*
44  * Cursor allocation zone.
45  */
46 kmem_zone_t     *xfs_btree_cur_zone;
47
48 /*
49  * Btree magic numbers.
50  */
51 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
52         XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
53 };
54
55 /*
56  * External routines.
57  */
58
59 #ifdef DEBUG
60 /*
61  * Debug routine: check that keys are in the right order.
62  */
63 void
64 xfs_btree_check_key(
65         xfs_btnum_t     btnum,          /* btree identifier */
66         void            *ak1,           /* pointer to left (lower) key */
67         void            *ak2)           /* pointer to right (higher) key */
68 {
69         switch (btnum) {
70         case XFS_BTNUM_BNO: {
71                 xfs_alloc_key_t *k1;
72                 xfs_alloc_key_t *k2;
73
74                 k1 = ak1;
75                 k2 = ak2;
76                 ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
77                 break;
78             }
79         case XFS_BTNUM_CNT: {
80                 xfs_alloc_key_t *k1;
81                 xfs_alloc_key_t *k2;
82
83                 k1 = ak1;
84                 k2 = ak2;
85                 ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
86                        (k1->ar_blockcount == k2->ar_blockcount &&
87                         be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
88                 break;
89             }
90         case XFS_BTNUM_BMAP: {
91                 xfs_bmbt_key_t  *k1;
92                 xfs_bmbt_key_t  *k2;
93
94                 k1 = ak1;
95                 k2 = ak2;
96                 ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
97                 break;
98             }
99         case XFS_BTNUM_INO: {
100                 xfs_inobt_key_t *k1;
101                 xfs_inobt_key_t *k2;
102
103                 k1 = ak1;
104                 k2 = ak2;
105                 ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
106                 break;
107             }
108         default:
109                 ASSERT(0);
110         }
111 }
112
113 /*
114  * Debug routine: check that records are in the right order.
115  */
116 void
117 xfs_btree_check_rec(
118         xfs_btnum_t     btnum,          /* btree identifier */
119         void            *ar1,           /* pointer to left (lower) record */
120         void            *ar2)           /* pointer to right (higher) record */
121 {
122         switch (btnum) {
123         case XFS_BTNUM_BNO: {
124                 xfs_alloc_rec_t *r1;
125                 xfs_alloc_rec_t *r2;
126
127                 r1 = ar1;
128                 r2 = ar2;
129                 ASSERT(be32_to_cpu(r1->ar_startblock) +
130                        be32_to_cpu(r1->ar_blockcount) <=
131                        be32_to_cpu(r2->ar_startblock));
132                 break;
133             }
134         case XFS_BTNUM_CNT: {
135                 xfs_alloc_rec_t *r1;
136                 xfs_alloc_rec_t *r2;
137
138                 r1 = ar1;
139                 r2 = ar2;
140                 ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
141                        (r1->ar_blockcount == r2->ar_blockcount &&
142                         be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
143                 break;
144             }
145         case XFS_BTNUM_BMAP: {
146                 xfs_bmbt_rec_t  *r1;
147                 xfs_bmbt_rec_t  *r2;
148
149                 r1 = ar1;
150                 r2 = ar2;
151                 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
152                        xfs_bmbt_disk_get_blockcount(r1) <=
153                        xfs_bmbt_disk_get_startoff(r2));
154                 break;
155             }
156         case XFS_BTNUM_INO: {
157                 xfs_inobt_rec_t *r1;
158                 xfs_inobt_rec_t *r2;
159
160                 r1 = ar1;
161                 r2 = ar2;
162                 ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
163                        be32_to_cpu(r2->ir_startino));
164                 break;
165             }
166         default:
167                 ASSERT(0);
168         }
169 }
170 #endif  /* DEBUG */
171
172 int                                     /* error (0 or EFSCORRUPTED) */
173 xfs_btree_check_lblock(
174         struct xfs_btree_cur    *cur,   /* btree cursor */
175         struct xfs_btree_lblock *block, /* btree long form block pointer */
176         int                     level,  /* level of the btree block */
177         struct xfs_buf          *bp)    /* buffer for block, if any */
178 {
179         int                     lblock_ok; /* block passes checks */
180         struct xfs_mount        *mp;    /* file system mount point */
181
182         mp = cur->bc_mp;
183         lblock_ok =
184                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
185                 be16_to_cpu(block->bb_level) == level &&
186                 be16_to_cpu(block->bb_numrecs) <=
187                         cur->bc_ops->get_maxrecs(cur, level) &&
188                 block->bb_leftsib &&
189                 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
190                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
191                 block->bb_rightsib &&
192                 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
193                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
194         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
195                         XFS_ERRTAG_BTREE_CHECK_LBLOCK,
196                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
197                 if (bp)
198                         xfs_buftrace("LBTREE ERROR", bp);
199                 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
200                                  mp);
201                 return XFS_ERROR(EFSCORRUPTED);
202         }
203         return 0;
204 }
205
206 int                                     /* error (0 or EFSCORRUPTED) */
207 xfs_btree_check_sblock(
208         struct xfs_btree_cur    *cur,   /* btree cursor */
209         struct xfs_btree_sblock *block, /* btree short form block pointer */
210         int                     level,  /* level of the btree block */
211         struct xfs_buf          *bp)    /* buffer containing block */
212 {
213         struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
214         struct xfs_agf          *agf;   /* ag. freespace structure */
215         xfs_agblock_t           agflen; /* native ag. freespace length */
216         int                     sblock_ok; /* block passes checks */
217
218         agbp = cur->bc_private.a.agbp;
219         agf = XFS_BUF_TO_AGF(agbp);
220         agflen = be32_to_cpu(agf->agf_length);
221         sblock_ok =
222                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
223                 be16_to_cpu(block->bb_level) == level &&
224                 be16_to_cpu(block->bb_numrecs) <=
225                         cur->bc_ops->get_maxrecs(cur, level) &&
226                 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
227                  be32_to_cpu(block->bb_leftsib) < agflen) &&
228                 block->bb_leftsib &&
229                 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
230                  be32_to_cpu(block->bb_rightsib) < agflen) &&
231                 block->bb_rightsib;
232         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
233                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
234                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
235                 if (bp)
236                         xfs_buftrace("SBTREE ERROR", bp);
237                 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
238                                  cur->bc_mp);
239                 return XFS_ERROR(EFSCORRUPTED);
240         }
241         return 0;
242 }
243
244 /*
245  * Debug routine: check that block header is ok.
246  */
247 int
248 xfs_btree_check_block(
249         struct xfs_btree_cur    *cur,   /* btree cursor */
250         struct xfs_btree_block  *block, /* generic btree block pointer */
251         int                     level,  /* level of the btree block */
252         struct xfs_buf          *bp)    /* buffer containing block, if any */
253 {
254         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
255                 return xfs_btree_check_lblock(cur,
256                                 (struct xfs_btree_lblock *)block, level, bp);
257         } else {
258                 return xfs_btree_check_sblock(cur,
259                                 (struct xfs_btree_sblock *)block, level, bp);
260         }
261 }
262
263 /*
264  * Check that (long) pointer is ok.
265  */
266 int                                     /* error (0 or EFSCORRUPTED) */
267 xfs_btree_check_lptr(
268         struct xfs_btree_cur    *cur,   /* btree cursor */
269         xfs_dfsbno_t            bno,    /* btree block disk address */
270         int                     level)  /* btree block level */
271 {
272         XFS_WANT_CORRUPTED_RETURN(
273                 level > 0 &&
274                 bno != NULLDFSBNO &&
275                 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
276         return 0;
277 }
278
279 /*
280  * Check that (short) pointer is ok.
281  */
282 int                                     /* error (0 or EFSCORRUPTED) */
283 xfs_btree_check_sptr(
284         struct xfs_btree_cur    *cur,   /* btree cursor */
285         xfs_agblock_t           bno,    /* btree block disk address */
286         int                     level)  /* btree block level */
287 {
288         xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
289
290         XFS_WANT_CORRUPTED_RETURN(
291                 level > 0 &&
292                 bno != NULLAGBLOCK &&
293                 bno != 0 &&
294                 bno < agblocks);
295         return 0;
296 }
297
298 /*
299  * Check that block ptr is ok.
300  */
301 int                                     /* error (0 or EFSCORRUPTED) */
302 xfs_btree_check_ptr(
303         struct xfs_btree_cur    *cur,   /* btree cursor */
304         union xfs_btree_ptr     *ptr,   /* btree block disk address */
305         int                     index,  /* offset from ptr to check */
306         int                     level)  /* btree block level */
307 {
308         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
309                 return xfs_btree_check_lptr(cur,
310                                 be64_to_cpu((&ptr->l)[index]), level);
311         } else {
312                 return xfs_btree_check_sptr(cur,
313                                 be32_to_cpu((&ptr->s)[index]), level);
314         }
315 }
316
317 /*
318  * Delete the btree cursor.
319  */
320 void
321 xfs_btree_del_cursor(
322         xfs_btree_cur_t *cur,           /* btree cursor */
323         int             error)          /* del because of error */
324 {
325         int             i;              /* btree level */
326
327         /*
328          * Clear the buffer pointers, and release the buffers.
329          * If we're doing this in the face of an error, we
330          * need to make sure to inspect all of the entries
331          * in the bc_bufs array for buffers to be unlocked.
332          * This is because some of the btree code works from
333          * level n down to 0, and if we get an error along
334          * the way we won't have initialized all the entries
335          * down to 0.
336          */
337         for (i = 0; i < cur->bc_nlevels; i++) {
338                 if (cur->bc_bufs[i])
339                         xfs_btree_setbuf(cur, i, NULL);
340                 else if (!error)
341                         break;
342         }
343         /*
344          * Can't free a bmap cursor without having dealt with the
345          * allocated indirect blocks' accounting.
346          */
347         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
348                cur->bc_private.b.allocated == 0);
349         /*
350          * Free the cursor.
351          */
352         kmem_zone_free(xfs_btree_cur_zone, cur);
353 }
354
355 /*
356  * Duplicate the btree cursor.
357  * Allocate a new one, copy the record, re-get the buffers.
358  */
359 int                                     /* error */
360 xfs_btree_dup_cursor(
361         xfs_btree_cur_t *cur,           /* input cursor */
362         xfs_btree_cur_t **ncur)         /* output cursor */
363 {
364         xfs_buf_t       *bp;            /* btree block's buffer pointer */
365         int             error;          /* error return value */
366         int             i;              /* level number of btree block */
367         xfs_mount_t     *mp;            /* mount structure for filesystem */
368         xfs_btree_cur_t *new;           /* new cursor value */
369         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
370
371         tp = cur->bc_tp;
372         mp = cur->bc_mp;
373
374         /*
375          * Allocate a new cursor like the old one.
376          */
377         new = cur->bc_ops->dup_cursor(cur);
378
379         /*
380          * Copy the record currently in the cursor.
381          */
382         new->bc_rec = cur->bc_rec;
383
384         /*
385          * For each level current, re-get the buffer and copy the ptr value.
386          */
387         for (i = 0; i < new->bc_nlevels; i++) {
388                 new->bc_ptrs[i] = cur->bc_ptrs[i];
389                 new->bc_ra[i] = cur->bc_ra[i];
390                 if ((bp = cur->bc_bufs[i])) {
391                         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
392                                 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
393                                 xfs_btree_del_cursor(new, error);
394                                 *ncur = NULL;
395                                 return error;
396                         }
397                         new->bc_bufs[i] = bp;
398                         ASSERT(bp);
399                         ASSERT(!XFS_BUF_GETERROR(bp));
400                 } else
401                         new->bc_bufs[i] = NULL;
402         }
403         *ncur = new;
404         return 0;
405 }
406
407 /*
408  * XFS btree block layout and addressing:
409  *
410  * There are two types of blocks in the btree: leaf and non-leaf blocks.
411  *
412  * The leaf record start with a header then followed by records containing
413  * the values.  A non-leaf block also starts with the same header, and
414  * then first contains lookup keys followed by an equal number of pointers
415  * to the btree blocks at the previous level.
416  *
417  *              +--------+-------+-------+-------+-------+-------+-------+
418  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
419  *              +--------+-------+-------+-------+-------+-------+-------+
420  *
421  *              +--------+-------+-------+-------+-------+-------+-------+
422  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
423  *              +--------+-------+-------+-------+-------+-------+-------+
424  *
425  * The header is called struct xfs_btree_block for reasons better left unknown
426  * and comes in different versions for short (32bit) and long (64bit) block
427  * pointers.  The record and key structures are defined by the btree instances
428  * and opaque to the btree core.  The block pointers are simple disk endian
429  * integers, available in a short (32bit) and long (64bit) variant.
430  *
431  * The helpers below calculate the offset of a given record, key or pointer
432  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
433  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
434  * inside the btree block is done using indices starting at one, not zero!
435  */
436
437 /*
438  * Return size of the btree block header for this btree instance.
439  */
440 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
441 {
442         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
443                 sizeof(struct xfs_btree_lblock) :
444                 sizeof(struct xfs_btree_sblock);
445 }
446
447 /*
448  * Return size of btree block pointers for this btree instance.
449  */
450 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
451 {
452         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
453                 sizeof(__be64) : sizeof(__be32);
454 }
455
456 /*
457  * Calculate offset of the n-th record in a btree block.
458  */
459 STATIC size_t
460 xfs_btree_rec_offset(
461         struct xfs_btree_cur    *cur,
462         int                     n)
463 {
464         return xfs_btree_block_len(cur) +
465                 (n - 1) * cur->bc_ops->rec_len;
466 }
467
468 /*
469  * Calculate offset of the n-th key in a btree block.
470  */
471 STATIC size_t
472 xfs_btree_key_offset(
473         struct xfs_btree_cur    *cur,
474         int                     n)
475 {
476         return xfs_btree_block_len(cur) +
477                 (n - 1) * cur->bc_ops->key_len;
478 }
479
480 /*
481  * Calculate offset of the n-th block pointer in a btree block.
482  */
483 STATIC size_t
484 xfs_btree_ptr_offset(
485         struct xfs_btree_cur    *cur,
486         int                     n,
487         int                     level)
488 {
489         return xfs_btree_block_len(cur) +
490                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
491                 (n - 1) * xfs_btree_ptr_len(cur);
492 }
493
494 /*
495  * Return a pointer to the n-th record in the btree block.
496  */
497 STATIC union xfs_btree_rec *
498 xfs_btree_rec_addr(
499         struct xfs_btree_cur    *cur,
500         int                     n,
501         struct xfs_btree_block  *block)
502 {
503         return (union xfs_btree_rec *)
504                 ((char *)block + xfs_btree_rec_offset(cur, n));
505 }
506
507 /*
508  * Return a pointer to the n-th key in the btree block.
509  */
510 STATIC union xfs_btree_key *
511 xfs_btree_key_addr(
512         struct xfs_btree_cur    *cur,
513         int                     n,
514         struct xfs_btree_block  *block)
515 {
516         return (union xfs_btree_key *)
517                 ((char *)block + xfs_btree_key_offset(cur, n));
518 }
519
520 /*
521  * Return a pointer to the n-th block pointer in the btree block.
522  */
523 STATIC union xfs_btree_ptr *
524 xfs_btree_ptr_addr(
525         struct xfs_btree_cur    *cur,
526         int                     n,
527         struct xfs_btree_block  *block)
528 {
529         int                     level = xfs_btree_get_level(block);
530
531         ASSERT(block->bb_level != 0);
532
533         return (union xfs_btree_ptr *)
534                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
535 }
536
537 /*
538  * Get a the root block which is stored in the inode.
539  *
540  * For now this btree implementation assumes the btree root is always
541  * stored in the if_broot field of an inode fork.
542  */
543 STATIC struct xfs_btree_block *
544 xfs_btree_get_iroot(
545        struct xfs_btree_cur    *cur)
546 {
547        struct xfs_ifork        *ifp;
548
549        ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
550        return (struct xfs_btree_block *)ifp->if_broot;
551 }
552
553 /*
554  * Retrieve the block pointer from the cursor at the given level.
555  * This may be an inode btree root or from a buffer.
556  */
557 STATIC struct xfs_btree_block *         /* generic btree block pointer */
558 xfs_btree_get_block(
559         struct xfs_btree_cur    *cur,   /* btree cursor */
560         int                     level,  /* level in btree */
561         struct xfs_buf          **bpp)  /* buffer containing the block */
562 {
563         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
564             (level == cur->bc_nlevels - 1)) {
565                 *bpp = NULL;
566                 return xfs_btree_get_iroot(cur);
567         }
568
569         *bpp = cur->bc_bufs[level];
570         return XFS_BUF_TO_BLOCK(*bpp);
571 }
572
573 /*
574  * Get a buffer for the block, return it with no data read.
575  * Long-form addressing.
576  */
577 xfs_buf_t *                             /* buffer for fsbno */
578 xfs_btree_get_bufl(
579         xfs_mount_t     *mp,            /* file system mount point */
580         xfs_trans_t     *tp,            /* transaction pointer */
581         xfs_fsblock_t   fsbno,          /* file system block number */
582         uint            lock)           /* lock flags for get_buf */
583 {
584         xfs_buf_t       *bp;            /* buffer pointer (return value) */
585         xfs_daddr_t             d;              /* real disk block address */
586
587         ASSERT(fsbno != NULLFSBLOCK);
588         d = XFS_FSB_TO_DADDR(mp, fsbno);
589         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
590         ASSERT(bp);
591         ASSERT(!XFS_BUF_GETERROR(bp));
592         return bp;
593 }
594
595 /*
596  * Get a buffer for the block, return it with no data read.
597  * Short-form addressing.
598  */
599 xfs_buf_t *                             /* buffer for agno/agbno */
600 xfs_btree_get_bufs(
601         xfs_mount_t     *mp,            /* file system mount point */
602         xfs_trans_t     *tp,            /* transaction pointer */
603         xfs_agnumber_t  agno,           /* allocation group number */
604         xfs_agblock_t   agbno,          /* allocation group block number */
605         uint            lock)           /* lock flags for get_buf */
606 {
607         xfs_buf_t       *bp;            /* buffer pointer (return value) */
608         xfs_daddr_t             d;              /* real disk block address */
609
610         ASSERT(agno != NULLAGNUMBER);
611         ASSERT(agbno != NULLAGBLOCK);
612         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
613         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
614         ASSERT(bp);
615         ASSERT(!XFS_BUF_GETERROR(bp));
616         return bp;
617 }
618
619 /*
620  * Check for the cursor referring to the last block at the given level.
621  */
622 int                                     /* 1=is last block, 0=not last block */
623 xfs_btree_islastblock(
624         xfs_btree_cur_t         *cur,   /* btree cursor */
625         int                     level)  /* level to check */
626 {
627         xfs_btree_block_t       *block; /* generic btree block pointer */
628         xfs_buf_t               *bp;    /* buffer containing block */
629
630         block = xfs_btree_get_block(cur, level, &bp);
631         xfs_btree_check_block(cur, block, level, bp);
632         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
633                 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
634         else
635                 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
636 }
637
638 /*
639  * Change the cursor to point to the first record at the given level.
640  * Other levels are unaffected.
641  */
642 int                                     /* success=1, failure=0 */
643 xfs_btree_firstrec(
644         xfs_btree_cur_t         *cur,   /* btree cursor */
645         int                     level)  /* level to change */
646 {
647         xfs_btree_block_t       *block; /* generic btree block pointer */
648         xfs_buf_t               *bp;    /* buffer containing block */
649
650         /*
651          * Get the block pointer for this level.
652          */
653         block = xfs_btree_get_block(cur, level, &bp);
654         xfs_btree_check_block(cur, block, level, bp);
655         /*
656          * It's empty, there is no such record.
657          */
658         if (!block->bb_numrecs)
659                 return 0;
660         /*
661          * Set the ptr value to 1, that's the first record/key.
662          */
663         cur->bc_ptrs[level] = 1;
664         return 1;
665 }
666
667 /*
668  * Change the cursor to point to the last record in the current block
669  * at the given level.  Other levels are unaffected.
670  */
671 int                                     /* success=1, failure=0 */
672 xfs_btree_lastrec(
673         xfs_btree_cur_t         *cur,   /* btree cursor */
674         int                     level)  /* level to change */
675 {
676         xfs_btree_block_t       *block; /* generic btree block pointer */
677         xfs_buf_t               *bp;    /* buffer containing block */
678
679         /*
680          * Get the block pointer for this level.
681          */
682         block = xfs_btree_get_block(cur, level, &bp);
683         xfs_btree_check_block(cur, block, level, bp);
684         /*
685          * It's empty, there is no such record.
686          */
687         if (!block->bb_numrecs)
688                 return 0;
689         /*
690          * Set the ptr value to numrecs, that's the last record/key.
691          */
692         cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
693         return 1;
694 }
695
696 /*
697  * Compute first and last byte offsets for the fields given.
698  * Interprets the offsets table, which contains struct field offsets.
699  */
700 void
701 xfs_btree_offsets(
702         __int64_t       fields,         /* bitmask of fields */
703         const short     *offsets,       /* table of field offsets */
704         int             nbits,          /* number of bits to inspect */
705         int             *first,         /* output: first byte offset */
706         int             *last)          /* output: last byte offset */
707 {
708         int             i;              /* current bit number */
709         __int64_t       imask;          /* mask for current bit number */
710
711         ASSERT(fields != 0);
712         /*
713          * Find the lowest bit, so the first byte offset.
714          */
715         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
716                 if (imask & fields) {
717                         *first = offsets[i];
718                         break;
719                 }
720         }
721         /*
722          * Find the highest bit, so the last byte offset.
723          */
724         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
725                 if (imask & fields) {
726                         *last = offsets[i + 1] - 1;
727                         break;
728                 }
729         }
730 }
731
732 /*
733  * Get a buffer for the block, return it read in.
734  * Long-form addressing.
735  */
736 int                                     /* error */
737 xfs_btree_read_bufl(
738         xfs_mount_t     *mp,            /* file system mount point */
739         xfs_trans_t     *tp,            /* transaction pointer */
740         xfs_fsblock_t   fsbno,          /* file system block number */
741         uint            lock,           /* lock flags for read_buf */
742         xfs_buf_t       **bpp,          /* buffer for fsbno */
743         int             refval)         /* ref count value for buffer */
744 {
745         xfs_buf_t       *bp;            /* return value */
746         xfs_daddr_t             d;              /* real disk block address */
747         int             error;
748
749         ASSERT(fsbno != NULLFSBLOCK);
750         d = XFS_FSB_TO_DADDR(mp, fsbno);
751         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
752                         mp->m_bsize, lock, &bp))) {
753                 return error;
754         }
755         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
756         if (bp != NULL) {
757                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
758         }
759         *bpp = bp;
760         return 0;
761 }
762
763 /*
764  * Get a buffer for the block, return it read in.
765  * Short-form addressing.
766  */
767 int                                     /* error */
768 xfs_btree_read_bufs(
769         xfs_mount_t     *mp,            /* file system mount point */
770         xfs_trans_t     *tp,            /* transaction pointer */
771         xfs_agnumber_t  agno,           /* allocation group number */
772         xfs_agblock_t   agbno,          /* allocation group block number */
773         uint            lock,           /* lock flags for read_buf */
774         xfs_buf_t       **bpp,          /* buffer for agno/agbno */
775         int             refval)         /* ref count value for buffer */
776 {
777         xfs_buf_t       *bp;            /* return value */
778         xfs_daddr_t     d;              /* real disk block address */
779         int             error;
780
781         ASSERT(agno != NULLAGNUMBER);
782         ASSERT(agbno != NULLAGBLOCK);
783         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
784         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
785                                         mp->m_bsize, lock, &bp))) {
786                 return error;
787         }
788         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
789         if (bp != NULL) {
790                 switch (refval) {
791                 case XFS_ALLOC_BTREE_REF:
792                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
793                         break;
794                 case XFS_INO_BTREE_REF:
795                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
796                         break;
797                 }
798         }
799         *bpp = bp;
800         return 0;
801 }
802
803 /*
804  * Read-ahead the block, don't wait for it, don't return a buffer.
805  * Long-form addressing.
806  */
807 /* ARGSUSED */
808 void
809 xfs_btree_reada_bufl(
810         xfs_mount_t     *mp,            /* file system mount point */
811         xfs_fsblock_t   fsbno,          /* file system block number */
812         xfs_extlen_t    count)          /* count of filesystem blocks */
813 {
814         xfs_daddr_t             d;
815
816         ASSERT(fsbno != NULLFSBLOCK);
817         d = XFS_FSB_TO_DADDR(mp, fsbno);
818         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
819 }
820
821 /*
822  * Read-ahead the block, don't wait for it, don't return a buffer.
823  * Short-form addressing.
824  */
825 /* ARGSUSED */
826 void
827 xfs_btree_reada_bufs(
828         xfs_mount_t     *mp,            /* file system mount point */
829         xfs_agnumber_t  agno,           /* allocation group number */
830         xfs_agblock_t   agbno,          /* allocation group block number */
831         xfs_extlen_t    count)          /* count of filesystem blocks */
832 {
833         xfs_daddr_t             d;
834
835         ASSERT(agno != NULLAGNUMBER);
836         ASSERT(agbno != NULLAGBLOCK);
837         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
838         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
839 }
840
841 STATIC int
842 xfs_btree_readahead_lblock(
843         struct xfs_btree_cur    *cur,
844         int                     lr,
845         struct xfs_btree_block  *block)
846 {
847         int                     rval = 0;
848         xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
849         xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
850
851         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
852                 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
853                 rval++;
854         }
855
856         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
857                 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
858                 rval++;
859         }
860
861         return rval;
862 }
863
864 STATIC int
865 xfs_btree_readahead_sblock(
866         struct xfs_btree_cur    *cur,
867         int                     lr,
868         struct xfs_btree_block *block)
869 {
870         int                     rval = 0;
871         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
872         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
873
874
875         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
876                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
877                                      left, 1);
878                 rval++;
879         }
880
881         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
882                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
883                                      right, 1);
884                 rval++;
885         }
886
887         return rval;
888 }
889
890 /*
891  * Read-ahead btree blocks, at the given level.
892  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
893  */
894 int
895 xfs_btree_readahead(
896         struct xfs_btree_cur    *cur,           /* btree cursor */
897         int                     lev,            /* level in btree */
898         int                     lr)             /* left/right bits */
899 {
900         struct xfs_btree_block  *block;
901
902         /*
903          * No readahead needed if we are at the root level and the
904          * btree root is stored in the inode.
905          */
906         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
907             (lev == cur->bc_nlevels - 1))
908                 return 0;
909
910         if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
911                 return 0;
912
913         cur->bc_ra[lev] |= lr;
914         block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
915
916         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
917                 return xfs_btree_readahead_lblock(cur, lr, block);
918         return xfs_btree_readahead_sblock(cur, lr, block);
919 }
920
921 /*
922  * Set the buffer for level "lev" in the cursor to bp, releasing
923  * any previous buffer.
924  */
925 void
926 xfs_btree_setbuf(
927         xfs_btree_cur_t         *cur,   /* btree cursor */
928         int                     lev,    /* level in btree */
929         xfs_buf_t               *bp)    /* new buffer to set */
930 {
931         xfs_btree_block_t       *b;     /* btree block */
932         xfs_buf_t               *obp;   /* old buffer pointer */
933
934         obp = cur->bc_bufs[lev];
935         if (obp)
936                 xfs_trans_brelse(cur->bc_tp, obp);
937         cur->bc_bufs[lev] = bp;
938         cur->bc_ra[lev] = 0;
939         if (!bp)
940                 return;
941         b = XFS_BUF_TO_BLOCK(bp);
942         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
943                 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
944                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
945                 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
946                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
947         } else {
948                 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
949                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
950                 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
951                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
952         }
953 }
954
955 STATIC int
956 xfs_btree_ptr_is_null(
957         struct xfs_btree_cur    *cur,
958         union xfs_btree_ptr     *ptr)
959 {
960         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
961                 return be64_to_cpu(ptr->l) == NULLFSBLOCK;
962         else
963                 return be32_to_cpu(ptr->s) == NULLAGBLOCK;
964 }
965
966 /*
967  * Get/set/init sibling pointers
968  */
969 STATIC void
970 xfs_btree_get_sibling(
971         struct xfs_btree_cur    *cur,
972         struct xfs_btree_block  *block,
973         union xfs_btree_ptr     *ptr,
974         int                     lr)
975 {
976         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
977
978         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
979                 if (lr == XFS_BB_RIGHTSIB)
980                         ptr->l = block->bb_u.l.bb_rightsib;
981                 else
982                         ptr->l = block->bb_u.l.bb_leftsib;
983         } else {
984                 if (lr == XFS_BB_RIGHTSIB)
985                         ptr->s = block->bb_u.s.bb_rightsib;
986                 else
987                         ptr->s = block->bb_u.s.bb_leftsib;
988         }
989 }
990
991 /*
992  * Return true if ptr is the last record in the btree and
993  * we need to track updateÑ• to this record.  The decision
994  * will be further refined in the update_lastrec method.
995  */
996 STATIC int
997 xfs_btree_is_lastrec(
998         struct xfs_btree_cur    *cur,
999         struct xfs_btree_block  *block,
1000         int                     level)
1001 {
1002         union xfs_btree_ptr     ptr;
1003
1004         if (level > 0)
1005                 return 0;
1006         if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1007                 return 0;
1008
1009         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1010         if (!xfs_btree_ptr_is_null(cur, &ptr))
1011                 return 0;
1012         return 1;
1013 }
1014
1015 STATIC xfs_daddr_t
1016 xfs_btree_ptr_to_daddr(
1017         struct xfs_btree_cur    *cur,
1018         union xfs_btree_ptr     *ptr)
1019 {
1020         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1021                 ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK);
1022
1023                 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
1024         } else {
1025                 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
1026                 ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
1027
1028                 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
1029                                         be32_to_cpu(ptr->s));
1030         }
1031 }
1032
1033 STATIC void
1034 xfs_btree_set_refs(
1035         struct xfs_btree_cur    *cur,
1036         struct xfs_buf          *bp)
1037 {
1038         switch (cur->bc_btnum) {
1039         case XFS_BTNUM_BNO:
1040         case XFS_BTNUM_CNT:
1041                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
1042                 break;
1043         case XFS_BTNUM_INO:
1044                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
1045                 break;
1046         case XFS_BTNUM_BMAP:
1047                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
1048                 break;
1049         default:
1050                 ASSERT(0);
1051         }
1052 }
1053
1054 /*
1055  * Read in the buffer at the given ptr and return the buffer and
1056  * the block pointer within the buffer.
1057  */
1058 STATIC int
1059 xfs_btree_read_buf_block(
1060         struct xfs_btree_cur    *cur,
1061         union xfs_btree_ptr     *ptr,
1062         int                     level,
1063         int                     flags,
1064         struct xfs_btree_block  **block,
1065         struct xfs_buf          **bpp)
1066 {
1067         struct xfs_mount        *mp = cur->bc_mp;
1068         xfs_daddr_t             d;
1069         int                     error;
1070
1071         /* need to sort out how callers deal with failures first */
1072         ASSERT(!(flags & XFS_BUF_TRYLOCK));
1073
1074         d = xfs_btree_ptr_to_daddr(cur, ptr);
1075         error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1076                                    mp->m_bsize, flags, bpp);
1077         if (error)
1078                 return error;
1079
1080         ASSERT(*bpp != NULL);
1081         ASSERT(!XFS_BUF_GETERROR(*bpp));
1082
1083         xfs_btree_set_refs(cur, *bpp);
1084         *block = XFS_BUF_TO_BLOCK(*bpp);
1085
1086         error = xfs_btree_check_block(cur, *block, level, *bpp);
1087         if (error)
1088                 xfs_trans_brelse(cur->bc_tp, *bpp);
1089         return error;
1090 }
1091
1092 /*
1093  * Copy keys from one btree block to another.
1094  */
1095 STATIC void
1096 xfs_btree_copy_keys(
1097         struct xfs_btree_cur    *cur,
1098         union xfs_btree_key     *dst_key,
1099         union xfs_btree_key     *src_key,
1100         int                     numkeys)
1101 {
1102         ASSERT(numkeys >= 0);
1103         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1104 }
1105
1106 /*
1107  * Copy records from one btree block to another.
1108  */
1109 STATIC void
1110 xfs_btree_copy_recs(
1111         struct xfs_btree_cur    *cur,
1112         union xfs_btree_rec     *dst_rec,
1113         union xfs_btree_rec     *src_rec,
1114         int                     numrecs)
1115 {
1116         ASSERT(numrecs >= 0);
1117         memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1118 }
1119
1120 /*
1121  * Log key values from the btree block.
1122  */
1123 STATIC void
1124 xfs_btree_log_keys(
1125         struct xfs_btree_cur    *cur,
1126         struct xfs_buf          *bp,
1127         int                     first,
1128         int                     last)
1129 {
1130         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1131         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1132
1133         if (bp) {
1134                 xfs_trans_log_buf(cur->bc_tp, bp,
1135                                   xfs_btree_key_offset(cur, first),
1136                                   xfs_btree_key_offset(cur, last + 1) - 1);
1137         } else {
1138                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1139                                 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1140         }
1141
1142         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1143 }
1144
1145 /*
1146  * Log record values from the btree block.
1147  */
1148 STATIC void
1149 xfs_btree_log_recs(
1150         struct xfs_btree_cur    *cur,
1151         struct xfs_buf          *bp,
1152         int                     first,
1153         int                     last)
1154 {
1155         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1156         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1157
1158         xfs_trans_log_buf(cur->bc_tp, bp,
1159                           xfs_btree_rec_offset(cur, first),
1160                           xfs_btree_rec_offset(cur, last + 1) - 1);
1161
1162         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1163 }
1164
1165 /*
1166  * Increment cursor by one record at the level.
1167  * For nonzero levels the leaf-ward information is untouched.
1168  */
1169 int                                             /* error */
1170 xfs_btree_increment(
1171         struct xfs_btree_cur    *cur,
1172         int                     level,
1173         int                     *stat)          /* success/failure */
1174 {
1175         struct xfs_btree_block  *block;
1176         union xfs_btree_ptr     ptr;
1177         struct xfs_buf          *bp;
1178         int                     error;          /* error return value */
1179         int                     lev;
1180
1181         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1182         XFS_BTREE_TRACE_ARGI(cur, level);
1183
1184         ASSERT(level < cur->bc_nlevels);
1185
1186         /* Read-ahead to the right at this level. */
1187         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1188
1189         /* Get a pointer to the btree block. */
1190         block = xfs_btree_get_block(cur, level, &bp);
1191
1192 #ifdef DEBUG
1193         error = xfs_btree_check_block(cur, block, level, bp);
1194         if (error)
1195                 goto error0;
1196 #endif
1197
1198         /* We're done if we remain in the block after the increment. */
1199         if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1200                 goto out1;
1201
1202         /* Fail if we just went off the right edge of the tree. */
1203         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1204         if (xfs_btree_ptr_is_null(cur, &ptr))
1205                 goto out0;
1206
1207         XFS_BTREE_STATS_INC(cur, increment);
1208
1209         /*
1210          * March up the tree incrementing pointers.
1211          * Stop when we don't go off the right edge of a block.
1212          */
1213         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1214                 block = xfs_btree_get_block(cur, lev, &bp);
1215
1216 #ifdef DEBUG
1217                 error = xfs_btree_check_block(cur, block, lev, bp);
1218                 if (error)
1219                         goto error0;
1220 #endif
1221
1222                 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1223                         break;
1224
1225                 /* Read-ahead the right block for the next loop. */
1226                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1227         }
1228
1229         /*
1230          * If we went off the root then we are either seriously
1231          * confused or have the tree root in an inode.
1232          */
1233         if (lev == cur->bc_nlevels) {
1234                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1235                         goto out0;
1236                 ASSERT(0);
1237                 error = EFSCORRUPTED;
1238                 goto error0;
1239         }
1240         ASSERT(lev < cur->bc_nlevels);
1241
1242         /*
1243          * Now walk back down the tree, fixing up the cursor's buffer
1244          * pointers and key numbers.
1245          */
1246         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1247                 union xfs_btree_ptr     *ptrp;
1248
1249                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1250                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1251                                                         0, &block, &bp);
1252                 if (error)
1253                         goto error0;
1254
1255                 xfs_btree_setbuf(cur, lev, bp);
1256                 cur->bc_ptrs[lev] = 1;
1257         }
1258 out1:
1259         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1260         *stat = 1;
1261         return 0;
1262
1263 out0:
1264         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1265         *stat = 0;
1266         return 0;
1267
1268 error0:
1269         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1270         return error;
1271 }
1272
1273 /*
1274  * Decrement cursor by one record at the level.
1275  * For nonzero levels the leaf-ward information is untouched.
1276  */
1277 int                                             /* error */
1278 xfs_btree_decrement(
1279         struct xfs_btree_cur    *cur,
1280         int                     level,
1281         int                     *stat)          /* success/failure */
1282 {
1283         struct xfs_btree_block  *block;
1284         xfs_buf_t               *bp;
1285         int                     error;          /* error return value */
1286         int                     lev;
1287         union xfs_btree_ptr     ptr;
1288
1289         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1290         XFS_BTREE_TRACE_ARGI(cur, level);
1291
1292         ASSERT(level < cur->bc_nlevels);
1293
1294         /* Read-ahead to the left at this level. */
1295         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1296
1297         /* We're done if we remain in the block after the decrement. */
1298         if (--cur->bc_ptrs[level] > 0)
1299                 goto out1;
1300
1301         /* Get a pointer to the btree block. */
1302         block = xfs_btree_get_block(cur, level, &bp);
1303
1304 #ifdef DEBUG
1305         error = xfs_btree_check_block(cur, block, level, bp);
1306         if (error)
1307                 goto error0;
1308 #endif
1309
1310         /* Fail if we just went off the left edge of the tree. */
1311         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1312         if (xfs_btree_ptr_is_null(cur, &ptr))
1313                 goto out0;
1314
1315         XFS_BTREE_STATS_INC(cur, decrement);
1316
1317         /*
1318          * March up the tree decrementing pointers.
1319          * Stop when we don't go off the left edge of a block.
1320          */
1321         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1322                 if (--cur->bc_ptrs[lev] > 0)
1323                         break;
1324                 /* Read-ahead the left block for the next loop. */
1325                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1326         }
1327
1328         /*
1329          * If we went off the root then we are seriously confused.
1330          * or the root of the tree is in an inode.
1331          */
1332         if (lev == cur->bc_nlevels) {
1333                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1334                         goto out0;
1335                 ASSERT(0);
1336                 error = EFSCORRUPTED;
1337                 goto error0;
1338         }
1339         ASSERT(lev < cur->bc_nlevels);
1340
1341         /*
1342          * Now walk back down the tree, fixing up the cursor's buffer
1343          * pointers and key numbers.
1344          */
1345         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1346                 union xfs_btree_ptr     *ptrp;
1347
1348                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1349                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1350                                                         0, &block, &bp);
1351                 if (error)
1352                         goto error0;
1353                 xfs_btree_setbuf(cur, lev, bp);
1354                 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1355         }
1356 out1:
1357         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1358         *stat = 1;
1359         return 0;
1360
1361 out0:
1362         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1363         *stat = 0;
1364         return 0;
1365
1366 error0:
1367         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1368         return error;
1369 }
1370
1371
1372 STATIC int
1373 xfs_btree_lookup_get_block(
1374         struct xfs_btree_cur    *cur,   /* btree cursor */
1375         int                     level,  /* level in the btree */
1376         union xfs_btree_ptr     *pp,    /* ptr to btree block */
1377         struct xfs_btree_block  **blkp) /* return btree block */
1378 {
1379         struct xfs_buf          *bp;    /* buffer pointer for btree block */
1380         int                     error = 0;
1381
1382         /* special case the root block if in an inode */
1383         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1384             (level == cur->bc_nlevels - 1)) {
1385                 *blkp = xfs_btree_get_iroot(cur);
1386                 return 0;
1387         }
1388
1389         /*
1390          * If the old buffer at this level for the disk address we are
1391          * looking for re-use it.
1392          *
1393          * Otherwise throw it away and get a new one.
1394          */
1395         bp = cur->bc_bufs[level];
1396         if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1397                 *blkp = XFS_BUF_TO_BLOCK(bp);
1398                 return 0;
1399         }
1400
1401         error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1402         if (error)
1403                 return error;
1404
1405         xfs_btree_setbuf(cur, level, bp);
1406         return 0;
1407 }
1408
1409 /*
1410  * Get current search key.  For level 0 we don't actually have a key
1411  * structure so we make one up from the record.  For all other levels
1412  * we just return the right key.
1413  */
1414 STATIC union xfs_btree_key *
1415 xfs_lookup_get_search_key(
1416         struct xfs_btree_cur    *cur,
1417         int                     level,
1418         int                     keyno,
1419         struct xfs_btree_block  *block,
1420         union xfs_btree_key     *kp)
1421 {
1422         if (level == 0) {
1423                 cur->bc_ops->init_key_from_rec(kp,
1424                                 xfs_btree_rec_addr(cur, keyno, block));
1425                 return kp;
1426         }
1427
1428         return xfs_btree_key_addr(cur, keyno, block);
1429 }
1430
1431 /*
1432  * Lookup the record.  The cursor is made to point to it, based on dir.
1433  * Return 0 if can't find any such record, 1 for success.
1434  */
1435 int                                     /* error */
1436 xfs_btree_lookup(
1437         struct xfs_btree_cur    *cur,   /* btree cursor */
1438         xfs_lookup_t            dir,    /* <=, ==, or >= */
1439         int                     *stat)  /* success/failure */
1440 {
1441         struct xfs_btree_block  *block; /* current btree block */
1442         __int64_t               diff;   /* difference for the current key */
1443         int                     error;  /* error return value */
1444         int                     keyno;  /* current key number */
1445         int                     level;  /* level in the btree */
1446         union xfs_btree_ptr     *pp;    /* ptr to btree block */
1447         union xfs_btree_ptr     ptr;    /* ptr to btree block */
1448
1449         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1450         XFS_BTREE_TRACE_ARGI(cur, dir);
1451
1452         XFS_BTREE_STATS_INC(cur, lookup);
1453
1454         block = NULL;
1455         keyno = 0;
1456
1457         /* initialise start pointer from cursor */
1458         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1459         pp = &ptr;
1460
1461         /*
1462          * Iterate over each level in the btree, starting at the root.
1463          * For each level above the leaves, find the key we need, based
1464          * on the lookup record, then follow the corresponding block
1465          * pointer down to the next level.
1466          */
1467         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1468                 /* Get the block we need to do the lookup on. */
1469                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1470                 if (error)
1471                         goto error0;
1472
1473                 if (diff == 0) {
1474                         /*
1475                          * If we already had a key match at a higher level, we
1476                          * know we need to use the first entry in this block.
1477                          */
1478                         keyno = 1;
1479                 } else {
1480                         /* Otherwise search this block. Do a binary search. */
1481
1482                         int     high;   /* high entry number */
1483                         int     low;    /* low entry number */
1484
1485                         /* Set low and high entry numbers, 1-based. */
1486                         low = 1;
1487                         high = xfs_btree_get_numrecs(block);
1488                         if (!high) {
1489                                 /* Block is empty, must be an empty leaf. */
1490                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
1491
1492                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1493                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1494                                 *stat = 0;
1495                                 return 0;
1496                         }
1497
1498                         /* Binary search the block. */
1499                         while (low <= high) {
1500                                 union xfs_btree_key     key;
1501                                 union xfs_btree_key     *kp;
1502
1503                                 XFS_BTREE_STATS_INC(cur, compare);
1504
1505                                 /* keyno is average of low and high. */
1506                                 keyno = (low + high) >> 1;
1507
1508                                 /* Get current search key */
1509                                 kp = xfs_lookup_get_search_key(cur, level,
1510                                                 keyno, block, &key);
1511
1512                                 /*
1513                                  * Compute difference to get next direction:
1514                                  *  - less than, move right
1515                                  *  - greater than, move left
1516                                  *  - equal, we're done
1517                                  */
1518                                 diff = cur->bc_ops->key_diff(cur, kp);
1519                                 if (diff < 0)
1520                                         low = keyno + 1;
1521                                 else if (diff > 0)
1522                                         high = keyno - 1;
1523                                 else
1524                                         break;
1525                         }
1526                 }
1527
1528                 /*
1529                  * If there are more levels, set up for the next level
1530                  * by getting the block number and filling in the cursor.
1531                  */
1532                 if (level > 0) {
1533                         /*
1534                          * If we moved left, need the previous key number,
1535                          * unless there isn't one.
1536                          */
1537                         if (diff > 0 && --keyno < 1)
1538                                 keyno = 1;
1539                         pp = xfs_btree_ptr_addr(cur, keyno, block);
1540
1541 #ifdef DEBUG
1542                         error = xfs_btree_check_ptr(cur, pp, 0, level);
1543                         if (error)
1544                                 goto error0;
1545 #endif
1546                         cur->bc_ptrs[level] = keyno;
1547                 }
1548         }
1549
1550         /* Done with the search. See if we need to adjust the results. */
1551         if (dir != XFS_LOOKUP_LE && diff < 0) {
1552                 keyno++;
1553                 /*
1554                  * If ge search and we went off the end of the block, but it's
1555                  * not the last block, we're in the wrong block.
1556                  */
1557                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1558                 if (dir == XFS_LOOKUP_GE &&
1559                     keyno > xfs_btree_get_numrecs(block) &&
1560                     !xfs_btree_ptr_is_null(cur, &ptr)) {
1561                         int     i;
1562
1563                         cur->bc_ptrs[0] = keyno;
1564                         error = xfs_btree_increment(cur, 0, &i);
1565                         if (error)
1566                                 goto error0;
1567                         XFS_WANT_CORRUPTED_RETURN(i == 1);
1568                         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1569                         *stat = 1;
1570                         return 0;
1571                 }
1572         } else if (dir == XFS_LOOKUP_LE && diff > 0)
1573                 keyno--;
1574         cur->bc_ptrs[0] = keyno;
1575
1576         /* Return if we succeeded or not. */
1577         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1578                 *stat = 0;
1579         else if (dir != XFS_LOOKUP_EQ || diff == 0)
1580                 *stat = 1;
1581         else
1582                 *stat = 0;
1583         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1584         return 0;
1585
1586 error0:
1587         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1588         return error;
1589 }
1590
1591 /*
1592  * Update keys at all levels from here to the root along the cursor's path.
1593  */
1594 int
1595 xfs_btree_updkey(
1596         struct xfs_btree_cur    *cur,
1597         union xfs_btree_key     *keyp,
1598         int                     level)
1599 {
1600         struct xfs_btree_block  *block;
1601         struct xfs_buf          *bp;
1602         union xfs_btree_key     *kp;
1603         int                     ptr;
1604
1605         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1606         XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1607
1608         ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1609
1610         /*
1611          * Go up the tree from this level toward the root.
1612          * At each level, update the key value to the value input.
1613          * Stop when we reach a level where the cursor isn't pointing
1614          * at the first entry in the block.
1615          */
1616         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1617 #ifdef DEBUG
1618                 int             error;
1619 #endif
1620                 block = xfs_btree_get_block(cur, level, &bp);
1621 #ifdef DEBUG
1622                 error = xfs_btree_check_block(cur, block, level, bp);
1623                 if (error) {
1624                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1625                         return error;
1626                 }
1627 #endif
1628                 ptr = cur->bc_ptrs[level];
1629                 kp = xfs_btree_key_addr(cur, ptr, block);
1630                 xfs_btree_copy_keys(cur, kp, keyp, 1);
1631                 xfs_btree_log_keys(cur, bp, ptr, ptr);
1632         }
1633
1634         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1635         return 0;
1636 }
1637
1638 /*
1639  * Update the record referred to by cur to the value in the
1640  * given record. This either works (return 0) or gets an
1641  * EFSCORRUPTED error.
1642  */
1643 int
1644 xfs_btree_update(
1645         struct xfs_btree_cur    *cur,
1646         union xfs_btree_rec     *rec)
1647 {
1648         struct xfs_btree_block  *block;
1649         struct xfs_buf          *bp;
1650         int                     error;
1651         int                     ptr;
1652         union xfs_btree_rec     *rp;
1653
1654         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1655         XFS_BTREE_TRACE_ARGR(cur, rec);
1656
1657         /* Pick up the current block. */
1658         block = xfs_btree_get_block(cur, 0, &bp);
1659
1660 #ifdef DEBUG
1661         error = xfs_btree_check_block(cur, block, 0, bp);
1662         if (error)
1663                 goto error0;
1664 #endif
1665         /* Get the address of the rec to be updated. */
1666         ptr = cur->bc_ptrs[0];
1667         rp = xfs_btree_rec_addr(cur, ptr, block);
1668
1669         /* Fill in the new contents and log them. */
1670         xfs_btree_copy_recs(cur, rp, rec, 1);
1671         xfs_btree_log_recs(cur, bp, ptr, ptr);
1672
1673         /*
1674          * If we are tracking the last record in the tree and
1675          * we are at the far right edge of the tree, update it.
1676          */
1677         if (xfs_btree_is_lastrec(cur, block, 0)) {
1678                 cur->bc_ops->update_lastrec(cur, block, rec,
1679                                             ptr, LASTREC_UPDATE);
1680         }
1681
1682         /* Updating first rec in leaf. Pass new key value up to our parent. */
1683         if (ptr == 1) {
1684                 union xfs_btree_key     key;
1685
1686                 cur->bc_ops->init_key_from_rec(&key, rec);
1687                 error = xfs_btree_updkey(cur, &key, 1);
1688                 if (error)
1689                         goto error0;
1690         }
1691
1692         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1693         return 0;
1694
1695 error0:
1696         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1697         return error;
1698 }
1699