[XFS] implement generic xfs_btree_increment
[safe/jmp/linux-2.6] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_btree.h"
38 #include "xfs_btree_trace.h"
39 #include "xfs_ialloc.h"
40 #include "xfs_error.h"
41
42 /*
43  * Cursor allocation zone.
44  */
45 kmem_zone_t     *xfs_btree_cur_zone;
46
47 /*
48  * Btree magic numbers.
49  */
50 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
51         XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
52 };
53
54 /*
55  * External routines.
56  */
57
58 #ifdef DEBUG
59 /*
60  * Debug routine: check that keys are in the right order.
61  */
62 void
63 xfs_btree_check_key(
64         xfs_btnum_t     btnum,          /* btree identifier */
65         void            *ak1,           /* pointer to left (lower) key */
66         void            *ak2)           /* pointer to right (higher) key */
67 {
68         switch (btnum) {
69         case XFS_BTNUM_BNO: {
70                 xfs_alloc_key_t *k1;
71                 xfs_alloc_key_t *k2;
72
73                 k1 = ak1;
74                 k2 = ak2;
75                 ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
76                 break;
77             }
78         case XFS_BTNUM_CNT: {
79                 xfs_alloc_key_t *k1;
80                 xfs_alloc_key_t *k2;
81
82                 k1 = ak1;
83                 k2 = ak2;
84                 ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
85                        (k1->ar_blockcount == k2->ar_blockcount &&
86                         be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
87                 break;
88             }
89         case XFS_BTNUM_BMAP: {
90                 xfs_bmbt_key_t  *k1;
91                 xfs_bmbt_key_t  *k2;
92
93                 k1 = ak1;
94                 k2 = ak2;
95                 ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
96                 break;
97             }
98         case XFS_BTNUM_INO: {
99                 xfs_inobt_key_t *k1;
100                 xfs_inobt_key_t *k2;
101
102                 k1 = ak1;
103                 k2 = ak2;
104                 ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
105                 break;
106             }
107         default:
108                 ASSERT(0);
109         }
110 }
111
112 /*
113  * Debug routine: check that records are in the right order.
114  */
115 void
116 xfs_btree_check_rec(
117         xfs_btnum_t     btnum,          /* btree identifier */
118         void            *ar1,           /* pointer to left (lower) record */
119         void            *ar2)           /* pointer to right (higher) record */
120 {
121         switch (btnum) {
122         case XFS_BTNUM_BNO: {
123                 xfs_alloc_rec_t *r1;
124                 xfs_alloc_rec_t *r2;
125
126                 r1 = ar1;
127                 r2 = ar2;
128                 ASSERT(be32_to_cpu(r1->ar_startblock) +
129                        be32_to_cpu(r1->ar_blockcount) <=
130                        be32_to_cpu(r2->ar_startblock));
131                 break;
132             }
133         case XFS_BTNUM_CNT: {
134                 xfs_alloc_rec_t *r1;
135                 xfs_alloc_rec_t *r2;
136
137                 r1 = ar1;
138                 r2 = ar2;
139                 ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
140                        (r1->ar_blockcount == r2->ar_blockcount &&
141                         be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
142                 break;
143             }
144         case XFS_BTNUM_BMAP: {
145                 xfs_bmbt_rec_t  *r1;
146                 xfs_bmbt_rec_t  *r2;
147
148                 r1 = ar1;
149                 r2 = ar2;
150                 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
151                        xfs_bmbt_disk_get_blockcount(r1) <=
152                        xfs_bmbt_disk_get_startoff(r2));
153                 break;
154             }
155         case XFS_BTNUM_INO: {
156                 xfs_inobt_rec_t *r1;
157                 xfs_inobt_rec_t *r2;
158
159                 r1 = ar1;
160                 r2 = ar2;
161                 ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
162                        be32_to_cpu(r2->ir_startino));
163                 break;
164             }
165         default:
166                 ASSERT(0);
167         }
168 }
169 #endif  /* DEBUG */
170
171 int                                     /* error (0 or EFSCORRUPTED) */
172 xfs_btree_check_lblock(
173         struct xfs_btree_cur    *cur,   /* btree cursor */
174         struct xfs_btree_lblock *block, /* btree long form block pointer */
175         int                     level,  /* level of the btree block */
176         struct xfs_buf          *bp)    /* buffer for block, if any */
177 {
178         int                     lblock_ok; /* block passes checks */
179         struct xfs_mount        *mp;    /* file system mount point */
180
181         mp = cur->bc_mp;
182         lblock_ok =
183                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
184                 be16_to_cpu(block->bb_level) == level &&
185                 be16_to_cpu(block->bb_numrecs) <=
186                         cur->bc_ops->get_maxrecs(cur, level) &&
187                 block->bb_leftsib &&
188                 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
189                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
190                 block->bb_rightsib &&
191                 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
192                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
193         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
194                         XFS_ERRTAG_BTREE_CHECK_LBLOCK,
195                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
196                 if (bp)
197                         xfs_buftrace("LBTREE ERROR", bp);
198                 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
199                                  mp);
200                 return XFS_ERROR(EFSCORRUPTED);
201         }
202         return 0;
203 }
204
205 int                                     /* error (0 or EFSCORRUPTED) */
206 xfs_btree_check_sblock(
207         struct xfs_btree_cur    *cur,   /* btree cursor */
208         struct xfs_btree_sblock *block, /* btree short form block pointer */
209         int                     level,  /* level of the btree block */
210         struct xfs_buf          *bp)    /* buffer containing block */
211 {
212         struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
213         struct xfs_agf          *agf;   /* ag. freespace structure */
214         xfs_agblock_t           agflen; /* native ag. freespace length */
215         int                     sblock_ok; /* block passes checks */
216
217         agbp = cur->bc_private.a.agbp;
218         agf = XFS_BUF_TO_AGF(agbp);
219         agflen = be32_to_cpu(agf->agf_length);
220         sblock_ok =
221                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
222                 be16_to_cpu(block->bb_level) == level &&
223                 be16_to_cpu(block->bb_numrecs) <=
224                         cur->bc_ops->get_maxrecs(cur, level) &&
225                 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
226                  be32_to_cpu(block->bb_leftsib) < agflen) &&
227                 block->bb_leftsib &&
228                 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
229                  be32_to_cpu(block->bb_rightsib) < agflen) &&
230                 block->bb_rightsib;
231         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
232                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
233                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
234                 if (bp)
235                         xfs_buftrace("SBTREE ERROR", bp);
236                 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
237                                  cur->bc_mp);
238                 return XFS_ERROR(EFSCORRUPTED);
239         }
240         return 0;
241 }
242
243 /*
244  * Debug routine: check that block header is ok.
245  */
246 int
247 xfs_btree_check_block(
248         struct xfs_btree_cur    *cur,   /* btree cursor */
249         struct xfs_btree_block  *block, /* generic btree block pointer */
250         int                     level,  /* level of the btree block */
251         struct xfs_buf          *bp)    /* buffer containing block, if any */
252 {
253         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
254                 return xfs_btree_check_lblock(cur,
255                                 (struct xfs_btree_lblock *)block, level, bp);
256         } else {
257                 return xfs_btree_check_sblock(cur,
258                                 (struct xfs_btree_sblock *)block, level, bp);
259         }
260 }
261
262 /*
263  * Check that (long) pointer is ok.
264  */
265 int                                     /* error (0 or EFSCORRUPTED) */
266 xfs_btree_check_lptr(
267         struct xfs_btree_cur    *cur,   /* btree cursor */
268         xfs_dfsbno_t            bno,    /* btree block disk address */
269         int                     level)  /* btree block level */
270 {
271         XFS_WANT_CORRUPTED_RETURN(
272                 level > 0 &&
273                 bno != NULLDFSBNO &&
274                 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
275         return 0;
276 }
277
278 /*
279  * Check that (short) pointer is ok.
280  */
281 int                                     /* error (0 or EFSCORRUPTED) */
282 xfs_btree_check_sptr(
283         struct xfs_btree_cur    *cur,   /* btree cursor */
284         xfs_agblock_t           bno,    /* btree block disk address */
285         int                     level)  /* btree block level */
286 {
287         xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
288
289         XFS_WANT_CORRUPTED_RETURN(
290                 level > 0 &&
291                 bno != NULLAGBLOCK &&
292                 bno != 0 &&
293                 bno < agblocks);
294         return 0;
295 }
296
297 /*
298  * Check that block ptr is ok.
299  */
300 int                                     /* error (0 or EFSCORRUPTED) */
301 xfs_btree_check_ptr(
302         struct xfs_btree_cur    *cur,   /* btree cursor */
303         union xfs_btree_ptr     *ptr,   /* btree block disk address */
304         int                     index,  /* offset from ptr to check */
305         int                     level)  /* btree block level */
306 {
307         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
308                 return xfs_btree_check_lptr(cur,
309                                 be64_to_cpu((&ptr->l)[index]), level);
310         } else {
311                 return xfs_btree_check_sptr(cur,
312                                 be32_to_cpu((&ptr->s)[index]), level);
313         }
314 }
315
316 /*
317  * Delete the btree cursor.
318  */
319 void
320 xfs_btree_del_cursor(
321         xfs_btree_cur_t *cur,           /* btree cursor */
322         int             error)          /* del because of error */
323 {
324         int             i;              /* btree level */
325
326         /*
327          * Clear the buffer pointers, and release the buffers.
328          * If we're doing this in the face of an error, we
329          * need to make sure to inspect all of the entries
330          * in the bc_bufs array for buffers to be unlocked.
331          * This is because some of the btree code works from
332          * level n down to 0, and if we get an error along
333          * the way we won't have initialized all the entries
334          * down to 0.
335          */
336         for (i = 0; i < cur->bc_nlevels; i++) {
337                 if (cur->bc_bufs[i])
338                         xfs_btree_setbuf(cur, i, NULL);
339                 else if (!error)
340                         break;
341         }
342         /*
343          * Can't free a bmap cursor without having dealt with the
344          * allocated indirect blocks' accounting.
345          */
346         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
347                cur->bc_private.b.allocated == 0);
348         /*
349          * Free the cursor.
350          */
351         kmem_zone_free(xfs_btree_cur_zone, cur);
352 }
353
354 /*
355  * Duplicate the btree cursor.
356  * Allocate a new one, copy the record, re-get the buffers.
357  */
358 int                                     /* error */
359 xfs_btree_dup_cursor(
360         xfs_btree_cur_t *cur,           /* input cursor */
361         xfs_btree_cur_t **ncur)         /* output cursor */
362 {
363         xfs_buf_t       *bp;            /* btree block's buffer pointer */
364         int             error;          /* error return value */
365         int             i;              /* level number of btree block */
366         xfs_mount_t     *mp;            /* mount structure for filesystem */
367         xfs_btree_cur_t *new;           /* new cursor value */
368         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
369
370         tp = cur->bc_tp;
371         mp = cur->bc_mp;
372
373         /*
374          * Allocate a new cursor like the old one.
375          */
376         new = cur->bc_ops->dup_cursor(cur);
377
378         /*
379          * Copy the record currently in the cursor.
380          */
381         new->bc_rec = cur->bc_rec;
382
383         /*
384          * For each level current, re-get the buffer and copy the ptr value.
385          */
386         for (i = 0; i < new->bc_nlevels; i++) {
387                 new->bc_ptrs[i] = cur->bc_ptrs[i];
388                 new->bc_ra[i] = cur->bc_ra[i];
389                 if ((bp = cur->bc_bufs[i])) {
390                         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
391                                 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
392                                 xfs_btree_del_cursor(new, error);
393                                 *ncur = NULL;
394                                 return error;
395                         }
396                         new->bc_bufs[i] = bp;
397                         ASSERT(bp);
398                         ASSERT(!XFS_BUF_GETERROR(bp));
399                 } else
400                         new->bc_bufs[i] = NULL;
401         }
402         *ncur = new;
403         return 0;
404 }
405
406 /*
407  * XFS btree block layout and addressing:
408  *
409  * There are two types of blocks in the btree: leaf and non-leaf blocks.
410  *
411  * The leaf record start with a header then followed by records containing
412  * the values.  A non-leaf block also starts with the same header, and
413  * then first contains lookup keys followed by an equal number of pointers
414  * to the btree blocks at the previous level.
415  *
416  *              +--------+-------+-------+-------+-------+-------+-------+
417  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
418  *              +--------+-------+-------+-------+-------+-------+-------+
419  *
420  *              +--------+-------+-------+-------+-------+-------+-------+
421  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
422  *              +--------+-------+-------+-------+-------+-------+-------+
423  *
424  * The header is called struct xfs_btree_block for reasons better left unknown
425  * and comes in different versions for short (32bit) and long (64bit) block
426  * pointers.  The record and key structures are defined by the btree instances
427  * and opaque to the btree core.  The block pointers are simple disk endian
428  * integers, available in a short (32bit) and long (64bit) variant.
429  *
430  * The helpers below calculate the offset of a given record, key or pointer
431  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
432  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
433  * inside the btree block is done using indices starting at one, not zero!
434  */
435
436 /*
437  * Return size of the btree block header for this btree instance.
438  */
439 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
440 {
441         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
442                 sizeof(struct xfs_btree_lblock) :
443                 sizeof(struct xfs_btree_sblock);
444 }
445
446 /*
447  * Return size of btree block pointers for this btree instance.
448  */
449 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
450 {
451         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
452                 sizeof(__be64) : sizeof(__be32);
453 }
454
455 /*
456  * Calculate offset of the n-th record in a btree block.
457  */
458 STATIC size_t
459 xfs_btree_rec_offset(
460         struct xfs_btree_cur    *cur,
461         int                     n)
462 {
463         return xfs_btree_block_len(cur) +
464                 (n - 1) * cur->bc_ops->rec_len;
465 }
466
467 /*
468  * Calculate offset of the n-th key in a btree block.
469  */
470 STATIC size_t
471 xfs_btree_key_offset(
472         struct xfs_btree_cur    *cur,
473         int                     n)
474 {
475         return xfs_btree_block_len(cur) +
476                 (n - 1) * cur->bc_ops->key_len;
477 }
478
479 /*
480  * Calculate offset of the n-th block pointer in a btree block.
481  */
482 STATIC size_t
483 xfs_btree_ptr_offset(
484         struct xfs_btree_cur    *cur,
485         int                     n,
486         int                     level)
487 {
488         return xfs_btree_block_len(cur) +
489                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
490                 (n - 1) * xfs_btree_ptr_len(cur);
491 }
492
493 /*
494  * Return a pointer to the n-th record in the btree block.
495  */
496 STATIC union xfs_btree_rec *
497 xfs_btree_rec_addr(
498         struct xfs_btree_cur    *cur,
499         int                     n,
500         struct xfs_btree_block  *block)
501 {
502         return (union xfs_btree_rec *)
503                 ((char *)block + xfs_btree_rec_offset(cur, n));
504 }
505
506 /*
507  * Return a pointer to the n-th key in the btree block.
508  */
509 STATIC union xfs_btree_key *
510 xfs_btree_key_addr(
511         struct xfs_btree_cur    *cur,
512         int                     n,
513         struct xfs_btree_block  *block)
514 {
515         return (union xfs_btree_key *)
516                 ((char *)block + xfs_btree_key_offset(cur, n));
517 }
518
519 /*
520  * Return a pointer to the n-th block pointer in the btree block.
521  */
522 STATIC union xfs_btree_ptr *
523 xfs_btree_ptr_addr(
524         struct xfs_btree_cur    *cur,
525         int                     n,
526         struct xfs_btree_block  *block)
527 {
528         int                     level = xfs_btree_get_level(block);
529
530         ASSERT(block->bb_level != 0);
531
532         return (union xfs_btree_ptr *)
533                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
534 }
535
536 /*
537  * Get a the root block which is stored in the inode.
538  *
539  * For now this btree implementation assumes the btree root is always
540  * stored in the if_broot field of an inode fork.
541  */
542 STATIC struct xfs_btree_block *
543 xfs_btree_get_iroot(
544        struct xfs_btree_cur    *cur)
545 {
546        struct xfs_ifork        *ifp;
547
548        ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
549        return (struct xfs_btree_block *)ifp->if_broot;
550 }
551
552 /*
553  * Retrieve the block pointer from the cursor at the given level.
554  * This may be an inode btree root or from a buffer.
555  */
556 STATIC struct xfs_btree_block *         /* generic btree block pointer */
557 xfs_btree_get_block(
558         struct xfs_btree_cur    *cur,   /* btree cursor */
559         int                     level,  /* level in btree */
560         struct xfs_buf          **bpp)  /* buffer containing the block */
561 {
562         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
563             (level == cur->bc_nlevels - 1)) {
564                 *bpp = NULL;
565                 return xfs_btree_get_iroot(cur);
566         }
567
568         *bpp = cur->bc_bufs[level];
569         return XFS_BUF_TO_BLOCK(*bpp);
570 }
571
572 /*
573  * Get a buffer for the block, return it with no data read.
574  * Long-form addressing.
575  */
576 xfs_buf_t *                             /* buffer for fsbno */
577 xfs_btree_get_bufl(
578         xfs_mount_t     *mp,            /* file system mount point */
579         xfs_trans_t     *tp,            /* transaction pointer */
580         xfs_fsblock_t   fsbno,          /* file system block number */
581         uint            lock)           /* lock flags for get_buf */
582 {
583         xfs_buf_t       *bp;            /* buffer pointer (return value) */
584         xfs_daddr_t             d;              /* real disk block address */
585
586         ASSERT(fsbno != NULLFSBLOCK);
587         d = XFS_FSB_TO_DADDR(mp, fsbno);
588         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
589         ASSERT(bp);
590         ASSERT(!XFS_BUF_GETERROR(bp));
591         return bp;
592 }
593
594 /*
595  * Get a buffer for the block, return it with no data read.
596  * Short-form addressing.
597  */
598 xfs_buf_t *                             /* buffer for agno/agbno */
599 xfs_btree_get_bufs(
600         xfs_mount_t     *mp,            /* file system mount point */
601         xfs_trans_t     *tp,            /* transaction pointer */
602         xfs_agnumber_t  agno,           /* allocation group number */
603         xfs_agblock_t   agbno,          /* allocation group block number */
604         uint            lock)           /* lock flags for get_buf */
605 {
606         xfs_buf_t       *bp;            /* buffer pointer (return value) */
607         xfs_daddr_t             d;              /* real disk block address */
608
609         ASSERT(agno != NULLAGNUMBER);
610         ASSERT(agbno != NULLAGBLOCK);
611         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
612         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
613         ASSERT(bp);
614         ASSERT(!XFS_BUF_GETERROR(bp));
615         return bp;
616 }
617
618 /*
619  * Check for the cursor referring to the last block at the given level.
620  */
621 int                                     /* 1=is last block, 0=not last block */
622 xfs_btree_islastblock(
623         xfs_btree_cur_t         *cur,   /* btree cursor */
624         int                     level)  /* level to check */
625 {
626         xfs_btree_block_t       *block; /* generic btree block pointer */
627         xfs_buf_t               *bp;    /* buffer containing block */
628
629         block = xfs_btree_get_block(cur, level, &bp);
630         xfs_btree_check_block(cur, block, level, bp);
631         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
632                 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
633         else
634                 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
635 }
636
637 /*
638  * Change the cursor to point to the first record at the given level.
639  * Other levels are unaffected.
640  */
641 int                                     /* success=1, failure=0 */
642 xfs_btree_firstrec(
643         xfs_btree_cur_t         *cur,   /* btree cursor */
644         int                     level)  /* level to change */
645 {
646         xfs_btree_block_t       *block; /* generic btree block pointer */
647         xfs_buf_t               *bp;    /* buffer containing block */
648
649         /*
650          * Get the block pointer for this level.
651          */
652         block = xfs_btree_get_block(cur, level, &bp);
653         xfs_btree_check_block(cur, block, level, bp);
654         /*
655          * It's empty, there is no such record.
656          */
657         if (!block->bb_numrecs)
658                 return 0;
659         /*
660          * Set the ptr value to 1, that's the first record/key.
661          */
662         cur->bc_ptrs[level] = 1;
663         return 1;
664 }
665
666 /*
667  * Change the cursor to point to the last record in the current block
668  * at the given level.  Other levels are unaffected.
669  */
670 int                                     /* success=1, failure=0 */
671 xfs_btree_lastrec(
672         xfs_btree_cur_t         *cur,   /* btree cursor */
673         int                     level)  /* level to change */
674 {
675         xfs_btree_block_t       *block; /* generic btree block pointer */
676         xfs_buf_t               *bp;    /* buffer containing block */
677
678         /*
679          * Get the block pointer for this level.
680          */
681         block = xfs_btree_get_block(cur, level, &bp);
682         xfs_btree_check_block(cur, block, level, bp);
683         /*
684          * It's empty, there is no such record.
685          */
686         if (!block->bb_numrecs)
687                 return 0;
688         /*
689          * Set the ptr value to numrecs, that's the last record/key.
690          */
691         cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
692         return 1;
693 }
694
695 /*
696  * Compute first and last byte offsets for the fields given.
697  * Interprets the offsets table, which contains struct field offsets.
698  */
699 void
700 xfs_btree_offsets(
701         __int64_t       fields,         /* bitmask of fields */
702         const short     *offsets,       /* table of field offsets */
703         int             nbits,          /* number of bits to inspect */
704         int             *first,         /* output: first byte offset */
705         int             *last)          /* output: last byte offset */
706 {
707         int             i;              /* current bit number */
708         __int64_t       imask;          /* mask for current bit number */
709
710         ASSERT(fields != 0);
711         /*
712          * Find the lowest bit, so the first byte offset.
713          */
714         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
715                 if (imask & fields) {
716                         *first = offsets[i];
717                         break;
718                 }
719         }
720         /*
721          * Find the highest bit, so the last byte offset.
722          */
723         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
724                 if (imask & fields) {
725                         *last = offsets[i + 1] - 1;
726                         break;
727                 }
728         }
729 }
730
731 /*
732  * Get a buffer for the block, return it read in.
733  * Long-form addressing.
734  */
735 int                                     /* error */
736 xfs_btree_read_bufl(
737         xfs_mount_t     *mp,            /* file system mount point */
738         xfs_trans_t     *tp,            /* transaction pointer */
739         xfs_fsblock_t   fsbno,          /* file system block number */
740         uint            lock,           /* lock flags for read_buf */
741         xfs_buf_t       **bpp,          /* buffer for fsbno */
742         int             refval)         /* ref count value for buffer */
743 {
744         xfs_buf_t       *bp;            /* return value */
745         xfs_daddr_t             d;              /* real disk block address */
746         int             error;
747
748         ASSERT(fsbno != NULLFSBLOCK);
749         d = XFS_FSB_TO_DADDR(mp, fsbno);
750         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
751                         mp->m_bsize, lock, &bp))) {
752                 return error;
753         }
754         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
755         if (bp != NULL) {
756                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
757         }
758         *bpp = bp;
759         return 0;
760 }
761
762 /*
763  * Get a buffer for the block, return it read in.
764  * Short-form addressing.
765  */
766 int                                     /* error */
767 xfs_btree_read_bufs(
768         xfs_mount_t     *mp,            /* file system mount point */
769         xfs_trans_t     *tp,            /* transaction pointer */
770         xfs_agnumber_t  agno,           /* allocation group number */
771         xfs_agblock_t   agbno,          /* allocation group block number */
772         uint            lock,           /* lock flags for read_buf */
773         xfs_buf_t       **bpp,          /* buffer for agno/agbno */
774         int             refval)         /* ref count value for buffer */
775 {
776         xfs_buf_t       *bp;            /* return value */
777         xfs_daddr_t     d;              /* real disk block address */
778         int             error;
779
780         ASSERT(agno != NULLAGNUMBER);
781         ASSERT(agbno != NULLAGBLOCK);
782         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
783         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
784                                         mp->m_bsize, lock, &bp))) {
785                 return error;
786         }
787         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
788         if (bp != NULL) {
789                 switch (refval) {
790                 case XFS_ALLOC_BTREE_REF:
791                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
792                         break;
793                 case XFS_INO_BTREE_REF:
794                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
795                         break;
796                 }
797         }
798         *bpp = bp;
799         return 0;
800 }
801
802 /*
803  * Read-ahead the block, don't wait for it, don't return a buffer.
804  * Long-form addressing.
805  */
806 /* ARGSUSED */
807 void
808 xfs_btree_reada_bufl(
809         xfs_mount_t     *mp,            /* file system mount point */
810         xfs_fsblock_t   fsbno,          /* file system block number */
811         xfs_extlen_t    count)          /* count of filesystem blocks */
812 {
813         xfs_daddr_t             d;
814
815         ASSERT(fsbno != NULLFSBLOCK);
816         d = XFS_FSB_TO_DADDR(mp, fsbno);
817         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
818 }
819
820 /*
821  * Read-ahead the block, don't wait for it, don't return a buffer.
822  * Short-form addressing.
823  */
824 /* ARGSUSED */
825 void
826 xfs_btree_reada_bufs(
827         xfs_mount_t     *mp,            /* file system mount point */
828         xfs_agnumber_t  agno,           /* allocation group number */
829         xfs_agblock_t   agbno,          /* allocation group block number */
830         xfs_extlen_t    count)          /* count of filesystem blocks */
831 {
832         xfs_daddr_t             d;
833
834         ASSERT(agno != NULLAGNUMBER);
835         ASSERT(agbno != NULLAGBLOCK);
836         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
837         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
838 }
839
840 STATIC int
841 xfs_btree_readahead_lblock(
842         struct xfs_btree_cur    *cur,
843         int                     lr,
844         struct xfs_btree_block  *block)
845 {
846         int                     rval = 0;
847         xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
848         xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
849
850         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
851                 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
852                 rval++;
853         }
854
855         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
856                 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
857                 rval++;
858         }
859
860         return rval;
861 }
862
863 STATIC int
864 xfs_btree_readahead_sblock(
865         struct xfs_btree_cur    *cur,
866         int                     lr,
867         struct xfs_btree_block *block)
868 {
869         int                     rval = 0;
870         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
871         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
872
873
874         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
875                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
876                                      left, 1);
877                 rval++;
878         }
879
880         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
881                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
882                                      right, 1);
883                 rval++;
884         }
885
886         return rval;
887 }
888
889 /*
890  * Read-ahead btree blocks, at the given level.
891  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
892  */
893 int
894 xfs_btree_readahead(
895         struct xfs_btree_cur    *cur,           /* btree cursor */
896         int                     lev,            /* level in btree */
897         int                     lr)             /* left/right bits */
898 {
899         struct xfs_btree_block  *block;
900
901         /*
902          * No readahead needed if we are at the root level and the
903          * btree root is stored in the inode.
904          */
905         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
906             (lev == cur->bc_nlevels - 1))
907                 return 0;
908
909         if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
910                 return 0;
911
912         cur->bc_ra[lev] |= lr;
913         block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
914
915         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
916                 return xfs_btree_readahead_lblock(cur, lr, block);
917         return xfs_btree_readahead_sblock(cur, lr, block);
918 }
919
920 /*
921  * Set the buffer for level "lev" in the cursor to bp, releasing
922  * any previous buffer.
923  */
924 void
925 xfs_btree_setbuf(
926         xfs_btree_cur_t         *cur,   /* btree cursor */
927         int                     lev,    /* level in btree */
928         xfs_buf_t               *bp)    /* new buffer to set */
929 {
930         xfs_btree_block_t       *b;     /* btree block */
931         xfs_buf_t               *obp;   /* old buffer pointer */
932
933         obp = cur->bc_bufs[lev];
934         if (obp)
935                 xfs_trans_brelse(cur->bc_tp, obp);
936         cur->bc_bufs[lev] = bp;
937         cur->bc_ra[lev] = 0;
938         if (!bp)
939                 return;
940         b = XFS_BUF_TO_BLOCK(bp);
941         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
942                 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
943                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
944                 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
945                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
946         } else {
947                 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
948                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
949                 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
950                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
951         }
952 }
953
954 STATIC int
955 xfs_btree_ptr_is_null(
956         struct xfs_btree_cur    *cur,
957         union xfs_btree_ptr     *ptr)
958 {
959         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
960                 return be64_to_cpu(ptr->l) == NULLFSBLOCK;
961         else
962                 return be32_to_cpu(ptr->s) == NULLAGBLOCK;
963 }
964
965 /*
966  * Get/set/init sibling pointers
967  */
968 STATIC void
969 xfs_btree_get_sibling(
970         struct xfs_btree_cur    *cur,
971         struct xfs_btree_block  *block,
972         union xfs_btree_ptr     *ptr,
973         int                     lr)
974 {
975         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
976
977         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
978                 if (lr == XFS_BB_RIGHTSIB)
979                         ptr->l = block->bb_u.l.bb_rightsib;
980                 else
981                         ptr->l = block->bb_u.l.bb_leftsib;
982         } else {
983                 if (lr == XFS_BB_RIGHTSIB)
984                         ptr->s = block->bb_u.s.bb_rightsib;
985                 else
986                         ptr->s = block->bb_u.s.bb_leftsib;
987         }
988 }
989
990 STATIC xfs_daddr_t
991 xfs_btree_ptr_to_daddr(
992         struct xfs_btree_cur    *cur,
993         union xfs_btree_ptr     *ptr)
994 {
995         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
996                 ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK);
997
998                 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
999         } else {
1000                 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
1001                 ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
1002
1003                 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
1004                                         be32_to_cpu(ptr->s));
1005         }
1006 }
1007
1008 STATIC void
1009 xfs_btree_set_refs(
1010         struct xfs_btree_cur    *cur,
1011         struct xfs_buf          *bp)
1012 {
1013         switch (cur->bc_btnum) {
1014         case XFS_BTNUM_BNO:
1015         case XFS_BTNUM_CNT:
1016                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
1017                 break;
1018         case XFS_BTNUM_INO:
1019                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
1020                 break;
1021         case XFS_BTNUM_BMAP:
1022                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
1023                 break;
1024         default:
1025                 ASSERT(0);
1026         }
1027 }
1028
1029 /*
1030  * Read in the buffer at the given ptr and return the buffer and
1031  * the block pointer within the buffer.
1032  */
1033 STATIC int
1034 xfs_btree_read_buf_block(
1035         struct xfs_btree_cur    *cur,
1036         union xfs_btree_ptr     *ptr,
1037         int                     level,
1038         int                     flags,
1039         struct xfs_btree_block  **block,
1040         struct xfs_buf          **bpp)
1041 {
1042         struct xfs_mount        *mp = cur->bc_mp;
1043         xfs_daddr_t             d;
1044         int                     error;
1045
1046         /* need to sort out how callers deal with failures first */
1047         ASSERT(!(flags & XFS_BUF_TRYLOCK));
1048
1049         d = xfs_btree_ptr_to_daddr(cur, ptr);
1050         error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1051                                    mp->m_bsize, flags, bpp);
1052         if (error)
1053                 return error;
1054
1055         ASSERT(*bpp != NULL);
1056         ASSERT(!XFS_BUF_GETERROR(*bpp));
1057
1058         xfs_btree_set_refs(cur, *bpp);
1059         *block = XFS_BUF_TO_BLOCK(*bpp);
1060
1061         error = xfs_btree_check_block(cur, *block, level, *bpp);
1062         if (error)
1063                 xfs_trans_brelse(cur->bc_tp, *bpp);
1064         return error;
1065 }
1066
1067 /*
1068  * Increment cursor by one record at the level.
1069  * For nonzero levels the leaf-ward information is untouched.
1070  */
1071 int                                             /* error */
1072 xfs_btree_increment(
1073         struct xfs_btree_cur    *cur,
1074         int                     level,
1075         int                     *stat)          /* success/failure */
1076 {
1077         struct xfs_btree_block  *block;
1078         union xfs_btree_ptr     ptr;
1079         struct xfs_buf          *bp;
1080         int                     error;          /* error return value */
1081         int                     lev;
1082
1083         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1084         XFS_BTREE_TRACE_ARGI(cur, level);
1085
1086         ASSERT(level < cur->bc_nlevels);
1087
1088         /* Read-ahead to the right at this level. */
1089         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1090
1091         /* Get a pointer to the btree block. */
1092         block = xfs_btree_get_block(cur, level, &bp);
1093
1094 #ifdef DEBUG
1095         error = xfs_btree_check_block(cur, block, level, bp);
1096         if (error)
1097                 goto error0;
1098 #endif
1099
1100         /* We're done if we remain in the block after the increment. */
1101         if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1102                 goto out1;
1103
1104         /* Fail if we just went off the right edge of the tree. */
1105         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1106         if (xfs_btree_ptr_is_null(cur, &ptr))
1107                 goto out0;
1108
1109         XFS_BTREE_STATS_INC(cur, increment);
1110
1111         /*
1112          * March up the tree incrementing pointers.
1113          * Stop when we don't go off the right edge of a block.
1114          */
1115         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1116                 block = xfs_btree_get_block(cur, lev, &bp);
1117
1118 #ifdef DEBUG
1119                 error = xfs_btree_check_block(cur, block, lev, bp);
1120                 if (error)
1121                         goto error0;
1122 #endif
1123
1124                 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1125                         break;
1126
1127                 /* Read-ahead the right block for the next loop. */
1128                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1129         }
1130
1131         /*
1132          * If we went off the root then we are either seriously
1133          * confused or have the tree root in an inode.
1134          */
1135         if (lev == cur->bc_nlevels) {
1136                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1137                         goto out0;
1138                 ASSERT(0);
1139                 error = EFSCORRUPTED;
1140                 goto error0;
1141         }
1142         ASSERT(lev < cur->bc_nlevels);
1143
1144         /*
1145          * Now walk back down the tree, fixing up the cursor's buffer
1146          * pointers and key numbers.
1147          */
1148         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1149                 union xfs_btree_ptr     *ptrp;
1150
1151                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1152                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1153                                                         0, &block, &bp);
1154                 if (error)
1155                         goto error0;
1156
1157                 xfs_btree_setbuf(cur, lev, bp);
1158                 cur->bc_ptrs[lev] = 1;
1159         }
1160 out1:
1161         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1162         *stat = 1;
1163         return 0;
1164
1165 out0:
1166         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1167         *stat = 0;
1168         return 0;
1169
1170 error0:
1171         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1172         return error;
1173 }