[XFS] add helpers for addressing entities inside a btree block
[safe/jmp/linux-2.6] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_btree.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_error.h"
40
41 /*
42  * Cursor allocation zone.
43  */
44 kmem_zone_t     *xfs_btree_cur_zone;
45
46 /*
47  * Btree magic numbers.
48  */
49 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
50         XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
51 };
52
53 /*
54  * External routines.
55  */
56
57 #ifdef DEBUG
58 /*
59  * Debug routine: check that keys are in the right order.
60  */
61 void
62 xfs_btree_check_key(
63         xfs_btnum_t     btnum,          /* btree identifier */
64         void            *ak1,           /* pointer to left (lower) key */
65         void            *ak2)           /* pointer to right (higher) key */
66 {
67         switch (btnum) {
68         case XFS_BTNUM_BNO: {
69                 xfs_alloc_key_t *k1;
70                 xfs_alloc_key_t *k2;
71
72                 k1 = ak1;
73                 k2 = ak2;
74                 ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
75                 break;
76             }
77         case XFS_BTNUM_CNT: {
78                 xfs_alloc_key_t *k1;
79                 xfs_alloc_key_t *k2;
80
81                 k1 = ak1;
82                 k2 = ak2;
83                 ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
84                        (k1->ar_blockcount == k2->ar_blockcount &&
85                         be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
86                 break;
87             }
88         case XFS_BTNUM_BMAP: {
89                 xfs_bmbt_key_t  *k1;
90                 xfs_bmbt_key_t  *k2;
91
92                 k1 = ak1;
93                 k2 = ak2;
94                 ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
95                 break;
96             }
97         case XFS_BTNUM_INO: {
98                 xfs_inobt_key_t *k1;
99                 xfs_inobt_key_t *k2;
100
101                 k1 = ak1;
102                 k2 = ak2;
103                 ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
104                 break;
105             }
106         default:
107                 ASSERT(0);
108         }
109 }
110
111 /*
112  * Debug routine: check that records are in the right order.
113  */
114 void
115 xfs_btree_check_rec(
116         xfs_btnum_t     btnum,          /* btree identifier */
117         void            *ar1,           /* pointer to left (lower) record */
118         void            *ar2)           /* pointer to right (higher) record */
119 {
120         switch (btnum) {
121         case XFS_BTNUM_BNO: {
122                 xfs_alloc_rec_t *r1;
123                 xfs_alloc_rec_t *r2;
124
125                 r1 = ar1;
126                 r2 = ar2;
127                 ASSERT(be32_to_cpu(r1->ar_startblock) +
128                        be32_to_cpu(r1->ar_blockcount) <=
129                        be32_to_cpu(r2->ar_startblock));
130                 break;
131             }
132         case XFS_BTNUM_CNT: {
133                 xfs_alloc_rec_t *r1;
134                 xfs_alloc_rec_t *r2;
135
136                 r1 = ar1;
137                 r2 = ar2;
138                 ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
139                        (r1->ar_blockcount == r2->ar_blockcount &&
140                         be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
141                 break;
142             }
143         case XFS_BTNUM_BMAP: {
144                 xfs_bmbt_rec_t  *r1;
145                 xfs_bmbt_rec_t  *r2;
146
147                 r1 = ar1;
148                 r2 = ar2;
149                 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
150                        xfs_bmbt_disk_get_blockcount(r1) <=
151                        xfs_bmbt_disk_get_startoff(r2));
152                 break;
153             }
154         case XFS_BTNUM_INO: {
155                 xfs_inobt_rec_t *r1;
156                 xfs_inobt_rec_t *r2;
157
158                 r1 = ar1;
159                 r2 = ar2;
160                 ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
161                        be32_to_cpu(r2->ir_startino));
162                 break;
163             }
164         default:
165                 ASSERT(0);
166         }
167 }
168 #endif  /* DEBUG */
169
170 int                                     /* error (0 or EFSCORRUPTED) */
171 xfs_btree_check_lblock(
172         struct xfs_btree_cur    *cur,   /* btree cursor */
173         struct xfs_btree_lblock *block, /* btree long form block pointer */
174         int                     level,  /* level of the btree block */
175         struct xfs_buf          *bp)    /* buffer for block, if any */
176 {
177         int                     lblock_ok; /* block passes checks */
178         struct xfs_mount        *mp;    /* file system mount point */
179
180         mp = cur->bc_mp;
181         lblock_ok =
182                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
183                 be16_to_cpu(block->bb_level) == level &&
184                 be16_to_cpu(block->bb_numrecs) <=
185                         cur->bc_ops->get_maxrecs(cur, level) &&
186                 block->bb_leftsib &&
187                 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
188                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
189                 block->bb_rightsib &&
190                 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
191                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
192         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
193                         XFS_ERRTAG_BTREE_CHECK_LBLOCK,
194                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
195                 if (bp)
196                         xfs_buftrace("LBTREE ERROR", bp);
197                 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
198                                  mp);
199                 return XFS_ERROR(EFSCORRUPTED);
200         }
201         return 0;
202 }
203
204 int                                     /* error (0 or EFSCORRUPTED) */
205 xfs_btree_check_sblock(
206         struct xfs_btree_cur    *cur,   /* btree cursor */
207         struct xfs_btree_sblock *block, /* btree short form block pointer */
208         int                     level,  /* level of the btree block */
209         struct xfs_buf          *bp)    /* buffer containing block */
210 {
211         struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
212         struct xfs_agf          *agf;   /* ag. freespace structure */
213         xfs_agblock_t           agflen; /* native ag. freespace length */
214         int                     sblock_ok; /* block passes checks */
215
216         agbp = cur->bc_private.a.agbp;
217         agf = XFS_BUF_TO_AGF(agbp);
218         agflen = be32_to_cpu(agf->agf_length);
219         sblock_ok =
220                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
221                 be16_to_cpu(block->bb_level) == level &&
222                 be16_to_cpu(block->bb_numrecs) <=
223                         cur->bc_ops->get_maxrecs(cur, level) &&
224                 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
225                  be32_to_cpu(block->bb_leftsib) < agflen) &&
226                 block->bb_leftsib &&
227                 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
228                  be32_to_cpu(block->bb_rightsib) < agflen) &&
229                 block->bb_rightsib;
230         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
231                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
232                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
233                 if (bp)
234                         xfs_buftrace("SBTREE ERROR", bp);
235                 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
236                                  cur->bc_mp);
237                 return XFS_ERROR(EFSCORRUPTED);
238         }
239         return 0;
240 }
241
242 /*
243  * Debug routine: check that block header is ok.
244  */
245 int
246 xfs_btree_check_block(
247         struct xfs_btree_cur    *cur,   /* btree cursor */
248         struct xfs_btree_block  *block, /* generic btree block pointer */
249         int                     level,  /* level of the btree block */
250         struct xfs_buf          *bp)    /* buffer containing block, if any */
251 {
252         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
253                 return xfs_btree_check_lblock(cur,
254                                 (struct xfs_btree_lblock *)block, level, bp);
255         } else {
256                 return xfs_btree_check_sblock(cur,
257                                 (struct xfs_btree_sblock *)block, level, bp);
258         }
259 }
260
261 /*
262  * Check that (long) pointer is ok.
263  */
264 int                                     /* error (0 or EFSCORRUPTED) */
265 xfs_btree_check_lptr(
266         struct xfs_btree_cur    *cur,   /* btree cursor */
267         xfs_dfsbno_t            bno,    /* btree block disk address */
268         int                     level)  /* btree block level */
269 {
270         XFS_WANT_CORRUPTED_RETURN(
271                 level > 0 &&
272                 bno != NULLDFSBNO &&
273                 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
274         return 0;
275 }
276
277 /*
278  * Check that (short) pointer is ok.
279  */
280 int                                     /* error (0 or EFSCORRUPTED) */
281 xfs_btree_check_sptr(
282         struct xfs_btree_cur    *cur,   /* btree cursor */
283         xfs_agblock_t           bno,    /* btree block disk address */
284         int                     level)  /* btree block level */
285 {
286         xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
287
288         XFS_WANT_CORRUPTED_RETURN(
289                 level > 0 &&
290                 bno != NULLAGBLOCK &&
291                 bno != 0 &&
292                 bno < agblocks);
293         return 0;
294 }
295
296 /*
297  * Check that block ptr is ok.
298  */
299 int                                     /* error (0 or EFSCORRUPTED) */
300 xfs_btree_check_ptr(
301         struct xfs_btree_cur    *cur,   /* btree cursor */
302         union xfs_btree_ptr     *ptr,   /* btree block disk address */
303         int                     index,  /* offset from ptr to check */
304         int                     level)  /* btree block level */
305 {
306         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
307                 return xfs_btree_check_lptr(cur,
308                                 be64_to_cpu((&ptr->l)[index]), level);
309         } else {
310                 return xfs_btree_check_sptr(cur,
311                                 be32_to_cpu((&ptr->s)[index]), level);
312         }
313 }
314
315 /*
316  * Delete the btree cursor.
317  */
318 void
319 xfs_btree_del_cursor(
320         xfs_btree_cur_t *cur,           /* btree cursor */
321         int             error)          /* del because of error */
322 {
323         int             i;              /* btree level */
324
325         /*
326          * Clear the buffer pointers, and release the buffers.
327          * If we're doing this in the face of an error, we
328          * need to make sure to inspect all of the entries
329          * in the bc_bufs array for buffers to be unlocked.
330          * This is because some of the btree code works from
331          * level n down to 0, and if we get an error along
332          * the way we won't have initialized all the entries
333          * down to 0.
334          */
335         for (i = 0; i < cur->bc_nlevels; i++) {
336                 if (cur->bc_bufs[i])
337                         xfs_btree_setbuf(cur, i, NULL);
338                 else if (!error)
339                         break;
340         }
341         /*
342          * Can't free a bmap cursor without having dealt with the
343          * allocated indirect blocks' accounting.
344          */
345         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
346                cur->bc_private.b.allocated == 0);
347         /*
348          * Free the cursor.
349          */
350         kmem_zone_free(xfs_btree_cur_zone, cur);
351 }
352
353 /*
354  * Duplicate the btree cursor.
355  * Allocate a new one, copy the record, re-get the buffers.
356  */
357 int                                     /* error */
358 xfs_btree_dup_cursor(
359         xfs_btree_cur_t *cur,           /* input cursor */
360         xfs_btree_cur_t **ncur)         /* output cursor */
361 {
362         xfs_buf_t       *bp;            /* btree block's buffer pointer */
363         int             error;          /* error return value */
364         int             i;              /* level number of btree block */
365         xfs_mount_t     *mp;            /* mount structure for filesystem */
366         xfs_btree_cur_t *new;           /* new cursor value */
367         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
368
369         tp = cur->bc_tp;
370         mp = cur->bc_mp;
371
372         /*
373          * Allocate a new cursor like the old one.
374          */
375         new = cur->bc_ops->dup_cursor(cur);
376
377         /*
378          * Copy the record currently in the cursor.
379          */
380         new->bc_rec = cur->bc_rec;
381
382         /*
383          * For each level current, re-get the buffer and copy the ptr value.
384          */
385         for (i = 0; i < new->bc_nlevels; i++) {
386                 new->bc_ptrs[i] = cur->bc_ptrs[i];
387                 new->bc_ra[i] = cur->bc_ra[i];
388                 if ((bp = cur->bc_bufs[i])) {
389                         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
390                                 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
391                                 xfs_btree_del_cursor(new, error);
392                                 *ncur = NULL;
393                                 return error;
394                         }
395                         new->bc_bufs[i] = bp;
396                         ASSERT(bp);
397                         ASSERT(!XFS_BUF_GETERROR(bp));
398                 } else
399                         new->bc_bufs[i] = NULL;
400         }
401         *ncur = new;
402         return 0;
403 }
404
405 /*
406  * XFS btree block layout and addressing:
407  *
408  * There are two types of blocks in the btree: leaf and non-leaf blocks.
409  *
410  * The leaf record start with a header then followed by records containing
411  * the values.  A non-leaf block also starts with the same header, and
412  * then first contains lookup keys followed by an equal number of pointers
413  * to the btree blocks at the previous level.
414  *
415  *              +--------+-------+-------+-------+-------+-------+-------+
416  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
417  *              +--------+-------+-------+-------+-------+-------+-------+
418  *
419  *              +--------+-------+-------+-------+-------+-------+-------+
420  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
421  *              +--------+-------+-------+-------+-------+-------+-------+
422  *
423  * The header is called struct xfs_btree_block for reasons better left unknown
424  * and comes in different versions for short (32bit) and long (64bit) block
425  * pointers.  The record and key structures are defined by the btree instances
426  * and opaque to the btree core.  The block pointers are simple disk endian
427  * integers, available in a short (32bit) and long (64bit) variant.
428  *
429  * The helpers below calculate the offset of a given record, key or pointer
430  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
431  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
432  * inside the btree block is done using indices starting at one, not zero!
433  */
434
435 /*
436  * Return size of the btree block header for this btree instance.
437  */
438 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
439 {
440         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
441                 sizeof(struct xfs_btree_lblock) :
442                 sizeof(struct xfs_btree_sblock);
443 }
444
445 /*
446  * Return size of btree block pointers for this btree instance.
447  */
448 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
449 {
450         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
451                 sizeof(__be64) : sizeof(__be32);
452 }
453
454 /*
455  * Calculate offset of the n-th record in a btree block.
456  */
457 STATIC size_t
458 xfs_btree_rec_offset(
459         struct xfs_btree_cur    *cur,
460         int                     n)
461 {
462         return xfs_btree_block_len(cur) +
463                 (n - 1) * cur->bc_ops->rec_len;
464 }
465
466 /*
467  * Calculate offset of the n-th key in a btree block.
468  */
469 STATIC size_t
470 xfs_btree_key_offset(
471         struct xfs_btree_cur    *cur,
472         int                     n)
473 {
474         return xfs_btree_block_len(cur) +
475                 (n - 1) * cur->bc_ops->key_len;
476 }
477
478 /*
479  * Calculate offset of the n-th block pointer in a btree block.
480  */
481 STATIC size_t
482 xfs_btree_ptr_offset(
483         struct xfs_btree_cur    *cur,
484         int                     n,
485         int                     level)
486 {
487         return xfs_btree_block_len(cur) +
488                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
489                 (n - 1) * xfs_btree_ptr_len(cur);
490 }
491
492 /*
493  * Return a pointer to the n-th record in the btree block.
494  */
495 STATIC union xfs_btree_rec *
496 xfs_btree_rec_addr(
497         struct xfs_btree_cur    *cur,
498         int                     n,
499         struct xfs_btree_block  *block)
500 {
501         return (union xfs_btree_rec *)
502                 ((char *)block + xfs_btree_rec_offset(cur, n));
503 }
504
505 /*
506  * Return a pointer to the n-th key in the btree block.
507  */
508 STATIC union xfs_btree_key *
509 xfs_btree_key_addr(
510         struct xfs_btree_cur    *cur,
511         int                     n,
512         struct xfs_btree_block  *block)
513 {
514         return (union xfs_btree_key *)
515                 ((char *)block + xfs_btree_key_offset(cur, n));
516 }
517
518 /*
519  * Return a pointer to the n-th block pointer in the btree block.
520  */
521 STATIC union xfs_btree_ptr *
522 xfs_btree_ptr_addr(
523         struct xfs_btree_cur    *cur,
524         int                     n,
525         struct xfs_btree_block  *block)
526 {
527         int                     level = xfs_btree_get_level(block);
528
529         ASSERT(block->bb_level != 0);
530
531         return (union xfs_btree_ptr *)
532                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
533 }
534
535 /*
536  * Get a the root block which is stored in the inode.
537  *
538  * For now this btree implementation assumes the btree root is always
539  * stored in the if_broot field of an inode fork.
540  */
541 STATIC struct xfs_btree_block *
542 xfs_btree_get_iroot(
543        struct xfs_btree_cur    *cur)
544 {
545        struct xfs_ifork        *ifp;
546
547        ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
548        return (struct xfs_btree_block *)ifp->if_broot;
549 }
550
551 /*
552  * Retrieve the block pointer from the cursor at the given level.
553  * This may be an inode btree root or from a buffer.
554  */
555 STATIC struct xfs_btree_block *         /* generic btree block pointer */
556 xfs_btree_get_block(
557         struct xfs_btree_cur    *cur,   /* btree cursor */
558         int                     level,  /* level in btree */
559         struct xfs_buf          **bpp)  /* buffer containing the block */
560 {
561         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
562             (level == cur->bc_nlevels - 1)) {
563                 *bpp = NULL;
564                 return xfs_btree_get_iroot(cur);
565         }
566
567         *bpp = cur->bc_bufs[level];
568         return XFS_BUF_TO_BLOCK(*bpp);
569 }
570
571 /*
572  * Get a buffer for the block, return it with no data read.
573  * Long-form addressing.
574  */
575 xfs_buf_t *                             /* buffer for fsbno */
576 xfs_btree_get_bufl(
577         xfs_mount_t     *mp,            /* file system mount point */
578         xfs_trans_t     *tp,            /* transaction pointer */
579         xfs_fsblock_t   fsbno,          /* file system block number */
580         uint            lock)           /* lock flags for get_buf */
581 {
582         xfs_buf_t       *bp;            /* buffer pointer (return value) */
583         xfs_daddr_t             d;              /* real disk block address */
584
585         ASSERT(fsbno != NULLFSBLOCK);
586         d = XFS_FSB_TO_DADDR(mp, fsbno);
587         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
588         ASSERT(bp);
589         ASSERT(!XFS_BUF_GETERROR(bp));
590         return bp;
591 }
592
593 /*
594  * Get a buffer for the block, return it with no data read.
595  * Short-form addressing.
596  */
597 xfs_buf_t *                             /* buffer for agno/agbno */
598 xfs_btree_get_bufs(
599         xfs_mount_t     *mp,            /* file system mount point */
600         xfs_trans_t     *tp,            /* transaction pointer */
601         xfs_agnumber_t  agno,           /* allocation group number */
602         xfs_agblock_t   agbno,          /* allocation group block number */
603         uint            lock)           /* lock flags for get_buf */
604 {
605         xfs_buf_t       *bp;            /* buffer pointer (return value) */
606         xfs_daddr_t             d;              /* real disk block address */
607
608         ASSERT(agno != NULLAGNUMBER);
609         ASSERT(agbno != NULLAGBLOCK);
610         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
611         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
612         ASSERT(bp);
613         ASSERT(!XFS_BUF_GETERROR(bp));
614         return bp;
615 }
616
617 /*
618  * Check for the cursor referring to the last block at the given level.
619  */
620 int                                     /* 1=is last block, 0=not last block */
621 xfs_btree_islastblock(
622         xfs_btree_cur_t         *cur,   /* btree cursor */
623         int                     level)  /* level to check */
624 {
625         xfs_btree_block_t       *block; /* generic btree block pointer */
626         xfs_buf_t               *bp;    /* buffer containing block */
627
628         block = xfs_btree_get_block(cur, level, &bp);
629         xfs_btree_check_block(cur, block, level, bp);
630         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
631                 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
632         else
633                 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
634 }
635
636 /*
637  * Change the cursor to point to the first record at the given level.
638  * Other levels are unaffected.
639  */
640 int                                     /* success=1, failure=0 */
641 xfs_btree_firstrec(
642         xfs_btree_cur_t         *cur,   /* btree cursor */
643         int                     level)  /* level to change */
644 {
645         xfs_btree_block_t       *block; /* generic btree block pointer */
646         xfs_buf_t               *bp;    /* buffer containing block */
647
648         /*
649          * Get the block pointer for this level.
650          */
651         block = xfs_btree_get_block(cur, level, &bp);
652         xfs_btree_check_block(cur, block, level, bp);
653         /*
654          * It's empty, there is no such record.
655          */
656         if (!block->bb_numrecs)
657                 return 0;
658         /*
659          * Set the ptr value to 1, that's the first record/key.
660          */
661         cur->bc_ptrs[level] = 1;
662         return 1;
663 }
664
665 /*
666  * Change the cursor to point to the last record in the current block
667  * at the given level.  Other levels are unaffected.
668  */
669 int                                     /* success=1, failure=0 */
670 xfs_btree_lastrec(
671         xfs_btree_cur_t         *cur,   /* btree cursor */
672         int                     level)  /* level to change */
673 {
674         xfs_btree_block_t       *block; /* generic btree block pointer */
675         xfs_buf_t               *bp;    /* buffer containing block */
676
677         /*
678          * Get the block pointer for this level.
679          */
680         block = xfs_btree_get_block(cur, level, &bp);
681         xfs_btree_check_block(cur, block, level, bp);
682         /*
683          * It's empty, there is no such record.
684          */
685         if (!block->bb_numrecs)
686                 return 0;
687         /*
688          * Set the ptr value to numrecs, that's the last record/key.
689          */
690         cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
691         return 1;
692 }
693
694 /*
695  * Compute first and last byte offsets for the fields given.
696  * Interprets the offsets table, which contains struct field offsets.
697  */
698 void
699 xfs_btree_offsets(
700         __int64_t       fields,         /* bitmask of fields */
701         const short     *offsets,       /* table of field offsets */
702         int             nbits,          /* number of bits to inspect */
703         int             *first,         /* output: first byte offset */
704         int             *last)          /* output: last byte offset */
705 {
706         int             i;              /* current bit number */
707         __int64_t       imask;          /* mask for current bit number */
708
709         ASSERT(fields != 0);
710         /*
711          * Find the lowest bit, so the first byte offset.
712          */
713         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
714                 if (imask & fields) {
715                         *first = offsets[i];
716                         break;
717                 }
718         }
719         /*
720          * Find the highest bit, so the last byte offset.
721          */
722         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
723                 if (imask & fields) {
724                         *last = offsets[i + 1] - 1;
725                         break;
726                 }
727         }
728 }
729
730 /*
731  * Get a buffer for the block, return it read in.
732  * Long-form addressing.
733  */
734 int                                     /* error */
735 xfs_btree_read_bufl(
736         xfs_mount_t     *mp,            /* file system mount point */
737         xfs_trans_t     *tp,            /* transaction pointer */
738         xfs_fsblock_t   fsbno,          /* file system block number */
739         uint            lock,           /* lock flags for read_buf */
740         xfs_buf_t       **bpp,          /* buffer for fsbno */
741         int             refval)         /* ref count value for buffer */
742 {
743         xfs_buf_t       *bp;            /* return value */
744         xfs_daddr_t             d;              /* real disk block address */
745         int             error;
746
747         ASSERT(fsbno != NULLFSBLOCK);
748         d = XFS_FSB_TO_DADDR(mp, fsbno);
749         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
750                         mp->m_bsize, lock, &bp))) {
751                 return error;
752         }
753         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
754         if (bp != NULL) {
755                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
756         }
757         *bpp = bp;
758         return 0;
759 }
760
761 /*
762  * Get a buffer for the block, return it read in.
763  * Short-form addressing.
764  */
765 int                                     /* error */
766 xfs_btree_read_bufs(
767         xfs_mount_t     *mp,            /* file system mount point */
768         xfs_trans_t     *tp,            /* transaction pointer */
769         xfs_agnumber_t  agno,           /* allocation group number */
770         xfs_agblock_t   agbno,          /* allocation group block number */
771         uint            lock,           /* lock flags for read_buf */
772         xfs_buf_t       **bpp,          /* buffer for agno/agbno */
773         int             refval)         /* ref count value for buffer */
774 {
775         xfs_buf_t       *bp;            /* return value */
776         xfs_daddr_t     d;              /* real disk block address */
777         int             error;
778
779         ASSERT(agno != NULLAGNUMBER);
780         ASSERT(agbno != NULLAGBLOCK);
781         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
782         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
783                                         mp->m_bsize, lock, &bp))) {
784                 return error;
785         }
786         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
787         if (bp != NULL) {
788                 switch (refval) {
789                 case XFS_ALLOC_BTREE_REF:
790                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
791                         break;
792                 case XFS_INO_BTREE_REF:
793                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
794                         break;
795                 }
796         }
797         *bpp = bp;
798         return 0;
799 }
800
801 /*
802  * Read-ahead the block, don't wait for it, don't return a buffer.
803  * Long-form addressing.
804  */
805 /* ARGSUSED */
806 void
807 xfs_btree_reada_bufl(
808         xfs_mount_t     *mp,            /* file system mount point */
809         xfs_fsblock_t   fsbno,          /* file system block number */
810         xfs_extlen_t    count)          /* count of filesystem blocks */
811 {
812         xfs_daddr_t             d;
813
814         ASSERT(fsbno != NULLFSBLOCK);
815         d = XFS_FSB_TO_DADDR(mp, fsbno);
816         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
817 }
818
819 /*
820  * Read-ahead the block, don't wait for it, don't return a buffer.
821  * Short-form addressing.
822  */
823 /* ARGSUSED */
824 void
825 xfs_btree_reada_bufs(
826         xfs_mount_t     *mp,            /* file system mount point */
827         xfs_agnumber_t  agno,           /* allocation group number */
828         xfs_agblock_t   agbno,          /* allocation group block number */
829         xfs_extlen_t    count)          /* count of filesystem blocks */
830 {
831         xfs_daddr_t             d;
832
833         ASSERT(agno != NULLAGNUMBER);
834         ASSERT(agbno != NULLAGBLOCK);
835         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
836         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
837 }
838
839 STATIC int
840 xfs_btree_readahead_lblock(
841         struct xfs_btree_cur    *cur,
842         int                     lr,
843         struct xfs_btree_block  *block)
844 {
845         int                     rval = 0;
846         xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
847         xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
848
849         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
850                 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
851                 rval++;
852         }
853
854         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
855                 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
856                 rval++;
857         }
858
859         return rval;
860 }
861
862 STATIC int
863 xfs_btree_readahead_sblock(
864         struct xfs_btree_cur    *cur,
865         int                     lr,
866         struct xfs_btree_block *block)
867 {
868         int                     rval = 0;
869         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
870         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
871
872
873         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
874                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
875                                      left, 1);
876                 rval++;
877         }
878
879         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
880                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
881                                      right, 1);
882                 rval++;
883         }
884
885         return rval;
886 }
887
888 /*
889  * Read-ahead btree blocks, at the given level.
890  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
891  */
892 int
893 xfs_btree_readahead(
894         struct xfs_btree_cur    *cur,           /* btree cursor */
895         int                     lev,            /* level in btree */
896         int                     lr)             /* left/right bits */
897 {
898         struct xfs_btree_block  *block;
899
900         /*
901          * No readahead needed if we are at the root level and the
902          * btree root is stored in the inode.
903          */
904         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
905             (lev == cur->bc_nlevels - 1))
906                 return 0;
907
908         if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
909                 return 0;
910
911         cur->bc_ra[lev] |= lr;
912         block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
913
914         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
915                 return xfs_btree_readahead_lblock(cur, lr, block);
916         return xfs_btree_readahead_sblock(cur, lr, block);
917 }
918
919 /*
920  * Set the buffer for level "lev" in the cursor to bp, releasing
921  * any previous buffer.
922  */
923 void
924 xfs_btree_setbuf(
925         xfs_btree_cur_t         *cur,   /* btree cursor */
926         int                     lev,    /* level in btree */
927         xfs_buf_t               *bp)    /* new buffer to set */
928 {
929         xfs_btree_block_t       *b;     /* btree block */
930         xfs_buf_t               *obp;   /* old buffer pointer */
931
932         obp = cur->bc_bufs[lev];
933         if (obp)
934                 xfs_trans_brelse(cur->bc_tp, obp);
935         cur->bc_bufs[lev] = bp;
936         cur->bc_ra[lev] = 0;
937         if (!bp)
938                 return;
939         b = XFS_BUF_TO_BLOCK(bp);
940         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
941                 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
942                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
943                 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
944                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
945         } else {
946                 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
947                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
948                 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
949                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
950         }
951 }