[XFS] refactor btree validation helpers
[safe/jmp/linux-2.6] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_btree.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_error.h"
40
41 /*
42  * Cursor allocation zone.
43  */
44 kmem_zone_t     *xfs_btree_cur_zone;
45
46 /*
47  * Btree magic numbers.
48  */
49 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
50         XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
51 };
52
53 /*
54  * Checking routine: return maxrecs for the block.
55  */
56 STATIC int                              /* number of records fitting in block */
57 xfs_btree_maxrecs(
58         xfs_btree_cur_t         *cur,   /* btree cursor */
59         xfs_btree_block_t       *block) /* generic btree block pointer */
60 {
61         switch (cur->bc_btnum) {
62         case XFS_BTNUM_BNO:
63         case XFS_BTNUM_CNT:
64                 return (int)XFS_ALLOC_BLOCK_MAXRECS(
65                                 be16_to_cpu(block->bb_level), cur);
66         case XFS_BTNUM_BMAP:
67                 return (int)XFS_BMAP_BLOCK_IMAXRECS(
68                                 be16_to_cpu(block->bb_level), cur);
69         case XFS_BTNUM_INO:
70                 return (int)XFS_INOBT_BLOCK_MAXRECS(
71                                 be16_to_cpu(block->bb_level), cur);
72         default:
73                 ASSERT(0);
74                 return 0;
75         }
76 }
77
78 /*
79  * External routines.
80  */
81
82 #ifdef DEBUG
83 /*
84  * Debug routine: check that keys are in the right order.
85  */
86 void
87 xfs_btree_check_key(
88         xfs_btnum_t     btnum,          /* btree identifier */
89         void            *ak1,           /* pointer to left (lower) key */
90         void            *ak2)           /* pointer to right (higher) key */
91 {
92         switch (btnum) {
93         case XFS_BTNUM_BNO: {
94                 xfs_alloc_key_t *k1;
95                 xfs_alloc_key_t *k2;
96
97                 k1 = ak1;
98                 k2 = ak2;
99                 ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
100                 break;
101             }
102         case XFS_BTNUM_CNT: {
103                 xfs_alloc_key_t *k1;
104                 xfs_alloc_key_t *k2;
105
106                 k1 = ak1;
107                 k2 = ak2;
108                 ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
109                        (k1->ar_blockcount == k2->ar_blockcount &&
110                         be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
111                 break;
112             }
113         case XFS_BTNUM_BMAP: {
114                 xfs_bmbt_key_t  *k1;
115                 xfs_bmbt_key_t  *k2;
116
117                 k1 = ak1;
118                 k2 = ak2;
119                 ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
120                 break;
121             }
122         case XFS_BTNUM_INO: {
123                 xfs_inobt_key_t *k1;
124                 xfs_inobt_key_t *k2;
125
126                 k1 = ak1;
127                 k2 = ak2;
128                 ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
129                 break;
130             }
131         default:
132                 ASSERT(0);
133         }
134 }
135
136 /*
137  * Debug routine: check that records are in the right order.
138  */
139 void
140 xfs_btree_check_rec(
141         xfs_btnum_t     btnum,          /* btree identifier */
142         void            *ar1,           /* pointer to left (lower) record */
143         void            *ar2)           /* pointer to right (higher) record */
144 {
145         switch (btnum) {
146         case XFS_BTNUM_BNO: {
147                 xfs_alloc_rec_t *r1;
148                 xfs_alloc_rec_t *r2;
149
150                 r1 = ar1;
151                 r2 = ar2;
152                 ASSERT(be32_to_cpu(r1->ar_startblock) +
153                        be32_to_cpu(r1->ar_blockcount) <=
154                        be32_to_cpu(r2->ar_startblock));
155                 break;
156             }
157         case XFS_BTNUM_CNT: {
158                 xfs_alloc_rec_t *r1;
159                 xfs_alloc_rec_t *r2;
160
161                 r1 = ar1;
162                 r2 = ar2;
163                 ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
164                        (r1->ar_blockcount == r2->ar_blockcount &&
165                         be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
166                 break;
167             }
168         case XFS_BTNUM_BMAP: {
169                 xfs_bmbt_rec_t  *r1;
170                 xfs_bmbt_rec_t  *r2;
171
172                 r1 = ar1;
173                 r2 = ar2;
174                 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
175                        xfs_bmbt_disk_get_blockcount(r1) <=
176                        xfs_bmbt_disk_get_startoff(r2));
177                 break;
178             }
179         case XFS_BTNUM_INO: {
180                 xfs_inobt_rec_t *r1;
181                 xfs_inobt_rec_t *r2;
182
183                 r1 = ar1;
184                 r2 = ar2;
185                 ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
186                        be32_to_cpu(r2->ir_startino));
187                 break;
188             }
189         default:
190                 ASSERT(0);
191         }
192 }
193 #endif  /* DEBUG */
194
195 int                                     /* error (0 or EFSCORRUPTED) */
196 xfs_btree_check_lblock(
197         struct xfs_btree_cur    *cur,   /* btree cursor */
198         struct xfs_btree_lblock *block, /* btree long form block pointer */
199         int                     level,  /* level of the btree block */
200         struct xfs_buf          *bp)    /* buffer for block, if any */
201 {
202         int                     lblock_ok; /* block passes checks */
203         struct xfs_mount        *mp;    /* file system mount point */
204
205         mp = cur->bc_mp;
206         lblock_ok =
207                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
208                 be16_to_cpu(block->bb_level) == level &&
209                 be16_to_cpu(block->bb_numrecs) <=
210                         xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
211                 block->bb_leftsib &&
212                 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
213                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
214                 block->bb_rightsib &&
215                 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
216                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
217         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
218                         XFS_ERRTAG_BTREE_CHECK_LBLOCK,
219                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
220                 if (bp)
221                         xfs_buftrace("LBTREE ERROR", bp);
222                 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
223                                  mp);
224                 return XFS_ERROR(EFSCORRUPTED);
225         }
226         return 0;
227 }
228
229 int                                     /* error (0 or EFSCORRUPTED) */
230 xfs_btree_check_sblock(
231         struct xfs_btree_cur    *cur,   /* btree cursor */
232         struct xfs_btree_sblock *block, /* btree short form block pointer */
233         int                     level,  /* level of the btree block */
234         struct xfs_buf          *bp)    /* buffer containing block */
235 {
236         struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
237         struct xfs_agf          *agf;   /* ag. freespace structure */
238         xfs_agblock_t           agflen; /* native ag. freespace length */
239         int                     sblock_ok; /* block passes checks */
240
241         agbp = cur->bc_private.a.agbp;
242         agf = XFS_BUF_TO_AGF(agbp);
243         agflen = be32_to_cpu(agf->agf_length);
244         sblock_ok =
245                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
246                 be16_to_cpu(block->bb_level) == level &&
247                 be16_to_cpu(block->bb_numrecs) <=
248                         xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
249                 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
250                  be32_to_cpu(block->bb_leftsib) < agflen) &&
251                 block->bb_leftsib &&
252                 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
253                  be32_to_cpu(block->bb_rightsib) < agflen) &&
254                 block->bb_rightsib;
255         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
256                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
257                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
258                 if (bp)
259                         xfs_buftrace("SBTREE ERROR", bp);
260                 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
261                                  cur->bc_mp);
262                 return XFS_ERROR(EFSCORRUPTED);
263         }
264         return 0;
265 }
266
267 /*
268  * Debug routine: check that block header is ok.
269  */
270 int
271 xfs_btree_check_block(
272         struct xfs_btree_cur    *cur,   /* btree cursor */
273         struct xfs_btree_block  *block, /* generic btree block pointer */
274         int                     level,  /* level of the btree block */
275         struct xfs_buf          *bp)    /* buffer containing block, if any */
276 {
277         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
278                 return xfs_btree_check_lblock(cur,
279                                 (struct xfs_btree_lblock *)block, level, bp);
280         } else {
281                 return xfs_btree_check_sblock(cur,
282                                 (struct xfs_btree_sblock *)block, level, bp);
283         }
284 }
285
286 /*
287  * Check that (long) pointer is ok.
288  */
289 int                                     /* error (0 or EFSCORRUPTED) */
290 xfs_btree_check_lptr(
291         struct xfs_btree_cur    *cur,   /* btree cursor */
292         xfs_dfsbno_t            bno,    /* btree block disk address */
293         int                     level)  /* btree block level */
294 {
295         XFS_WANT_CORRUPTED_RETURN(
296                 level > 0 &&
297                 bno != NULLDFSBNO &&
298                 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
299         return 0;
300 }
301
302 /*
303  * Check that (short) pointer is ok.
304  */
305 int                                     /* error (0 or EFSCORRUPTED) */
306 xfs_btree_check_sptr(
307         struct xfs_btree_cur    *cur,   /* btree cursor */
308         xfs_agblock_t           bno,    /* btree block disk address */
309         int                     level)  /* btree block level */
310 {
311         xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
312
313         XFS_WANT_CORRUPTED_RETURN(
314                 level > 0 &&
315                 bno != NULLAGBLOCK &&
316                 bno != 0 &&
317                 bno < agblocks);
318         return 0;
319 }
320
321 /*
322  * Check that block ptr is ok.
323  */
324 int                                     /* error (0 or EFSCORRUPTED) */
325 xfs_btree_check_ptr(
326         struct xfs_btree_cur    *cur,   /* btree cursor */
327         union xfs_btree_ptr     *ptr,   /* btree block disk address */
328         int                     index,  /* offset from ptr to check */
329         int                     level)  /* btree block level */
330 {
331         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
332                 return xfs_btree_check_lptr(cur,
333                                 be64_to_cpu((&ptr->l)[index]), level);
334         } else {
335                 return xfs_btree_check_sptr(cur,
336                                 be32_to_cpu((&ptr->s)[index]), level);
337         }
338 }
339
340 /*
341  * Delete the btree cursor.
342  */
343 void
344 xfs_btree_del_cursor(
345         xfs_btree_cur_t *cur,           /* btree cursor */
346         int             error)          /* del because of error */
347 {
348         int             i;              /* btree level */
349
350         /*
351          * Clear the buffer pointers, and release the buffers.
352          * If we're doing this in the face of an error, we
353          * need to make sure to inspect all of the entries
354          * in the bc_bufs array for buffers to be unlocked.
355          * This is because some of the btree code works from
356          * level n down to 0, and if we get an error along
357          * the way we won't have initialized all the entries
358          * down to 0.
359          */
360         for (i = 0; i < cur->bc_nlevels; i++) {
361                 if (cur->bc_bufs[i])
362                         xfs_btree_setbuf(cur, i, NULL);
363                 else if (!error)
364                         break;
365         }
366         /*
367          * Can't free a bmap cursor without having dealt with the
368          * allocated indirect blocks' accounting.
369          */
370         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
371                cur->bc_private.b.allocated == 0);
372         /*
373          * Free the cursor.
374          */
375         kmem_zone_free(xfs_btree_cur_zone, cur);
376 }
377
378 /*
379  * Duplicate the btree cursor.
380  * Allocate a new one, copy the record, re-get the buffers.
381  */
382 int                                     /* error */
383 xfs_btree_dup_cursor(
384         xfs_btree_cur_t *cur,           /* input cursor */
385         xfs_btree_cur_t **ncur)         /* output cursor */
386 {
387         xfs_buf_t       *bp;            /* btree block's buffer pointer */
388         int             error;          /* error return value */
389         int             i;              /* level number of btree block */
390         xfs_mount_t     *mp;            /* mount structure for filesystem */
391         xfs_btree_cur_t *new;           /* new cursor value */
392         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
393
394         tp = cur->bc_tp;
395         mp = cur->bc_mp;
396
397         /*
398          * Allocate a new cursor like the old one.
399          */
400         new = cur->bc_ops->dup_cursor(cur);
401
402         /*
403          * Copy the record currently in the cursor.
404          */
405         new->bc_rec = cur->bc_rec;
406
407         /*
408          * For each level current, re-get the buffer and copy the ptr value.
409          */
410         for (i = 0; i < new->bc_nlevels; i++) {
411                 new->bc_ptrs[i] = cur->bc_ptrs[i];
412                 new->bc_ra[i] = cur->bc_ra[i];
413                 if ((bp = cur->bc_bufs[i])) {
414                         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
415                                 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
416                                 xfs_btree_del_cursor(new, error);
417                                 *ncur = NULL;
418                                 return error;
419                         }
420                         new->bc_bufs[i] = bp;
421                         ASSERT(bp);
422                         ASSERT(!XFS_BUF_GETERROR(bp));
423                 } else
424                         new->bc_bufs[i] = NULL;
425         }
426         *ncur = new;
427         return 0;
428 }
429
430 /*
431  * Get a the root block which is stored in the inode.
432  *
433  * For now this btree implementation assumes the btree root is always
434  * stored in the if_broot field of an inode fork.
435  */
436 STATIC struct xfs_btree_block *
437 xfs_btree_get_iroot(
438        struct xfs_btree_cur    *cur)
439 {
440        struct xfs_ifork        *ifp;
441
442        ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
443        return (struct xfs_btree_block *)ifp->if_broot;
444 }
445
446 /*
447  * Retrieve the block pointer from the cursor at the given level.
448  * This may be an inode btree root or from a buffer.
449  */
450 STATIC struct xfs_btree_block *         /* generic btree block pointer */
451 xfs_btree_get_block(
452         struct xfs_btree_cur    *cur,   /* btree cursor */
453         int                     level,  /* level in btree */
454         struct xfs_buf          **bpp)  /* buffer containing the block */
455 {
456         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
457             (level == cur->bc_nlevels - 1)) {
458                 *bpp = NULL;
459                 return xfs_btree_get_iroot(cur);
460         }
461
462         *bpp = cur->bc_bufs[level];
463         return XFS_BUF_TO_BLOCK(*bpp);
464 }
465
466 /*
467  * Get a buffer for the block, return it with no data read.
468  * Long-form addressing.
469  */
470 xfs_buf_t *                             /* buffer for fsbno */
471 xfs_btree_get_bufl(
472         xfs_mount_t     *mp,            /* file system mount point */
473         xfs_trans_t     *tp,            /* transaction pointer */
474         xfs_fsblock_t   fsbno,          /* file system block number */
475         uint            lock)           /* lock flags for get_buf */
476 {
477         xfs_buf_t       *bp;            /* buffer pointer (return value) */
478         xfs_daddr_t             d;              /* real disk block address */
479
480         ASSERT(fsbno != NULLFSBLOCK);
481         d = XFS_FSB_TO_DADDR(mp, fsbno);
482         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
483         ASSERT(bp);
484         ASSERT(!XFS_BUF_GETERROR(bp));
485         return bp;
486 }
487
488 /*
489  * Get a buffer for the block, return it with no data read.
490  * Short-form addressing.
491  */
492 xfs_buf_t *                             /* buffer for agno/agbno */
493 xfs_btree_get_bufs(
494         xfs_mount_t     *mp,            /* file system mount point */
495         xfs_trans_t     *tp,            /* transaction pointer */
496         xfs_agnumber_t  agno,           /* allocation group number */
497         xfs_agblock_t   agbno,          /* allocation group block number */
498         uint            lock)           /* lock flags for get_buf */
499 {
500         xfs_buf_t       *bp;            /* buffer pointer (return value) */
501         xfs_daddr_t             d;              /* real disk block address */
502
503         ASSERT(agno != NULLAGNUMBER);
504         ASSERT(agbno != NULLAGBLOCK);
505         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
506         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
507         ASSERT(bp);
508         ASSERT(!XFS_BUF_GETERROR(bp));
509         return bp;
510 }
511
512 /*
513  * Check for the cursor referring to the last block at the given level.
514  */
515 int                                     /* 1=is last block, 0=not last block */
516 xfs_btree_islastblock(
517         xfs_btree_cur_t         *cur,   /* btree cursor */
518         int                     level)  /* level to check */
519 {
520         xfs_btree_block_t       *block; /* generic btree block pointer */
521         xfs_buf_t               *bp;    /* buffer containing block */
522
523         block = xfs_btree_get_block(cur, level, &bp);
524         xfs_btree_check_block(cur, block, level, bp);
525         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
526                 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
527         else
528                 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
529 }
530
531 /*
532  * Change the cursor to point to the first record at the given level.
533  * Other levels are unaffected.
534  */
535 int                                     /* success=1, failure=0 */
536 xfs_btree_firstrec(
537         xfs_btree_cur_t         *cur,   /* btree cursor */
538         int                     level)  /* level to change */
539 {
540         xfs_btree_block_t       *block; /* generic btree block pointer */
541         xfs_buf_t               *bp;    /* buffer containing block */
542
543         /*
544          * Get the block pointer for this level.
545          */
546         block = xfs_btree_get_block(cur, level, &bp);
547         xfs_btree_check_block(cur, block, level, bp);
548         /*
549          * It's empty, there is no such record.
550          */
551         if (!block->bb_numrecs)
552                 return 0;
553         /*
554          * Set the ptr value to 1, that's the first record/key.
555          */
556         cur->bc_ptrs[level] = 1;
557         return 1;
558 }
559
560 /*
561  * Change the cursor to point to the last record in the current block
562  * at the given level.  Other levels are unaffected.
563  */
564 int                                     /* success=1, failure=0 */
565 xfs_btree_lastrec(
566         xfs_btree_cur_t         *cur,   /* btree cursor */
567         int                     level)  /* level to change */
568 {
569         xfs_btree_block_t       *block; /* generic btree block pointer */
570         xfs_buf_t               *bp;    /* buffer containing block */
571
572         /*
573          * Get the block pointer for this level.
574          */
575         block = xfs_btree_get_block(cur, level, &bp);
576         xfs_btree_check_block(cur, block, level, bp);
577         /*
578          * It's empty, there is no such record.
579          */
580         if (!block->bb_numrecs)
581                 return 0;
582         /*
583          * Set the ptr value to numrecs, that's the last record/key.
584          */
585         cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
586         return 1;
587 }
588
589 /*
590  * Compute first and last byte offsets for the fields given.
591  * Interprets the offsets table, which contains struct field offsets.
592  */
593 void
594 xfs_btree_offsets(
595         __int64_t       fields,         /* bitmask of fields */
596         const short     *offsets,       /* table of field offsets */
597         int             nbits,          /* number of bits to inspect */
598         int             *first,         /* output: first byte offset */
599         int             *last)          /* output: last byte offset */
600 {
601         int             i;              /* current bit number */
602         __int64_t       imask;          /* mask for current bit number */
603
604         ASSERT(fields != 0);
605         /*
606          * Find the lowest bit, so the first byte offset.
607          */
608         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
609                 if (imask & fields) {
610                         *first = offsets[i];
611                         break;
612                 }
613         }
614         /*
615          * Find the highest bit, so the last byte offset.
616          */
617         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
618                 if (imask & fields) {
619                         *last = offsets[i + 1] - 1;
620                         break;
621                 }
622         }
623 }
624
625 /*
626  * Get a buffer for the block, return it read in.
627  * Long-form addressing.
628  */
629 int                                     /* error */
630 xfs_btree_read_bufl(
631         xfs_mount_t     *mp,            /* file system mount point */
632         xfs_trans_t     *tp,            /* transaction pointer */
633         xfs_fsblock_t   fsbno,          /* file system block number */
634         uint            lock,           /* lock flags for read_buf */
635         xfs_buf_t       **bpp,          /* buffer for fsbno */
636         int             refval)         /* ref count value for buffer */
637 {
638         xfs_buf_t       *bp;            /* return value */
639         xfs_daddr_t             d;              /* real disk block address */
640         int             error;
641
642         ASSERT(fsbno != NULLFSBLOCK);
643         d = XFS_FSB_TO_DADDR(mp, fsbno);
644         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
645                         mp->m_bsize, lock, &bp))) {
646                 return error;
647         }
648         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
649         if (bp != NULL) {
650                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
651         }
652         *bpp = bp;
653         return 0;
654 }
655
656 /*
657  * Get a buffer for the block, return it read in.
658  * Short-form addressing.
659  */
660 int                                     /* error */
661 xfs_btree_read_bufs(
662         xfs_mount_t     *mp,            /* file system mount point */
663         xfs_trans_t     *tp,            /* transaction pointer */
664         xfs_agnumber_t  agno,           /* allocation group number */
665         xfs_agblock_t   agbno,          /* allocation group block number */
666         uint            lock,           /* lock flags for read_buf */
667         xfs_buf_t       **bpp,          /* buffer for agno/agbno */
668         int             refval)         /* ref count value for buffer */
669 {
670         xfs_buf_t       *bp;            /* return value */
671         xfs_daddr_t     d;              /* real disk block address */
672         int             error;
673
674         ASSERT(agno != NULLAGNUMBER);
675         ASSERT(agbno != NULLAGBLOCK);
676         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
677         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
678                                         mp->m_bsize, lock, &bp))) {
679                 return error;
680         }
681         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
682         if (bp != NULL) {
683                 switch (refval) {
684                 case XFS_ALLOC_BTREE_REF:
685                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
686                         break;
687                 case XFS_INO_BTREE_REF:
688                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
689                         break;
690                 }
691         }
692         *bpp = bp;
693         return 0;
694 }
695
696 /*
697  * Read-ahead the block, don't wait for it, don't return a buffer.
698  * Long-form addressing.
699  */
700 /* ARGSUSED */
701 void
702 xfs_btree_reada_bufl(
703         xfs_mount_t     *mp,            /* file system mount point */
704         xfs_fsblock_t   fsbno,          /* file system block number */
705         xfs_extlen_t    count)          /* count of filesystem blocks */
706 {
707         xfs_daddr_t             d;
708
709         ASSERT(fsbno != NULLFSBLOCK);
710         d = XFS_FSB_TO_DADDR(mp, fsbno);
711         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
712 }
713
714 /*
715  * Read-ahead the block, don't wait for it, don't return a buffer.
716  * Short-form addressing.
717  */
718 /* ARGSUSED */
719 void
720 xfs_btree_reada_bufs(
721         xfs_mount_t     *mp,            /* file system mount point */
722         xfs_agnumber_t  agno,           /* allocation group number */
723         xfs_agblock_t   agbno,          /* allocation group block number */
724         xfs_extlen_t    count)          /* count of filesystem blocks */
725 {
726         xfs_daddr_t             d;
727
728         ASSERT(agno != NULLAGNUMBER);
729         ASSERT(agbno != NULLAGBLOCK);
730         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
731         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
732 }
733
734 STATIC int
735 xfs_btree_readahead_lblock(
736         struct xfs_btree_cur    *cur,
737         int                     lr,
738         struct xfs_btree_block  *block)
739 {
740         int                     rval = 0;
741         xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
742         xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
743
744         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
745                 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
746                 rval++;
747         }
748
749         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
750                 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
751                 rval++;
752         }
753
754         return rval;
755 }
756
757 STATIC int
758 xfs_btree_readahead_sblock(
759         struct xfs_btree_cur    *cur,
760         int                     lr,
761         struct xfs_btree_block *block)
762 {
763         int                     rval = 0;
764         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
765         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
766
767
768         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
769                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
770                                      left, 1);
771                 rval++;
772         }
773
774         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
775                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
776                                      right, 1);
777                 rval++;
778         }
779
780         return rval;
781 }
782
783 /*
784  * Read-ahead btree blocks, at the given level.
785  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
786  */
787 int
788 xfs_btree_readahead(
789         struct xfs_btree_cur    *cur,           /* btree cursor */
790         int                     lev,            /* level in btree */
791         int                     lr)             /* left/right bits */
792 {
793         struct xfs_btree_block  *block;
794
795         /*
796          * No readahead needed if we are at the root level and the
797          * btree root is stored in the inode.
798          */
799         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
800             (lev == cur->bc_nlevels - 1))
801                 return 0;
802
803         if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
804                 return 0;
805
806         cur->bc_ra[lev] |= lr;
807         block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
808
809         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
810                 return xfs_btree_readahead_lblock(cur, lr, block);
811         return xfs_btree_readahead_sblock(cur, lr, block);
812 }
813
814 /*
815  * Set the buffer for level "lev" in the cursor to bp, releasing
816  * any previous buffer.
817  */
818 void
819 xfs_btree_setbuf(
820         xfs_btree_cur_t         *cur,   /* btree cursor */
821         int                     lev,    /* level in btree */
822         xfs_buf_t               *bp)    /* new buffer to set */
823 {
824         xfs_btree_block_t       *b;     /* btree block */
825         xfs_buf_t               *obp;   /* old buffer pointer */
826
827         obp = cur->bc_bufs[lev];
828         if (obp)
829                 xfs_trans_brelse(cur->bc_tp, obp);
830         cur->bc_bufs[lev] = bp;
831         cur->bc_ra[lev] = 0;
832         if (!bp)
833                 return;
834         b = XFS_BUF_TO_BLOCK(bp);
835         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
836                 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
837                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
838                 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
839                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
840         } else {
841                 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
842                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
843                 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
844                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
845         }
846 }