2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 #include "xfs_types.h"
24 #include "xfs_trans.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_btree.h"
38 #include "xfs_btree_trace.h"
39 #include "xfs_ialloc.h"
40 #include "xfs_error.h"
43 * Cursor allocation zone.
45 kmem_zone_t *xfs_btree_cur_zone;
48 * Btree magic numbers.
50 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
51 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
60 * Debug routine: check that keys are in the right order.
64 xfs_btnum_t btnum, /* btree identifier */
65 void *ak1, /* pointer to left (lower) key */
66 void *ak2) /* pointer to right (higher) key */
75 ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
84 ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
85 (k1->ar_blockcount == k2->ar_blockcount &&
86 be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
89 case XFS_BTNUM_BMAP: {
95 ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
104 ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
113 * Debug routine: check that records are in the right order.
117 xfs_btnum_t btnum, /* btree identifier */
118 void *ar1, /* pointer to left (lower) record */
119 void *ar2) /* pointer to right (higher) record */
122 case XFS_BTNUM_BNO: {
128 ASSERT(be32_to_cpu(r1->ar_startblock) +
129 be32_to_cpu(r1->ar_blockcount) <=
130 be32_to_cpu(r2->ar_startblock));
133 case XFS_BTNUM_CNT: {
139 ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
140 (r1->ar_blockcount == r2->ar_blockcount &&
141 be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
144 case XFS_BTNUM_BMAP: {
150 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
151 xfs_bmbt_disk_get_blockcount(r1) <=
152 xfs_bmbt_disk_get_startoff(r2));
155 case XFS_BTNUM_INO: {
161 ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
162 be32_to_cpu(r2->ir_startino));
171 int /* error (0 or EFSCORRUPTED) */
172 xfs_btree_check_lblock(
173 struct xfs_btree_cur *cur, /* btree cursor */
174 struct xfs_btree_lblock *block, /* btree long form block pointer */
175 int level, /* level of the btree block */
176 struct xfs_buf *bp) /* buffer for block, if any */
178 int lblock_ok; /* block passes checks */
179 struct xfs_mount *mp; /* file system mount point */
183 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
184 be16_to_cpu(block->bb_level) == level &&
185 be16_to_cpu(block->bb_numrecs) <=
186 cur->bc_ops->get_maxrecs(cur, level) &&
188 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
189 XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
190 block->bb_rightsib &&
191 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
192 XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
193 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
194 XFS_ERRTAG_BTREE_CHECK_LBLOCK,
195 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
197 xfs_buftrace("LBTREE ERROR", bp);
198 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
200 return XFS_ERROR(EFSCORRUPTED);
205 int /* error (0 or EFSCORRUPTED) */
206 xfs_btree_check_sblock(
207 struct xfs_btree_cur *cur, /* btree cursor */
208 struct xfs_btree_sblock *block, /* btree short form block pointer */
209 int level, /* level of the btree block */
210 struct xfs_buf *bp) /* buffer containing block */
212 struct xfs_buf *agbp; /* buffer for ag. freespace struct */
213 struct xfs_agf *agf; /* ag. freespace structure */
214 xfs_agblock_t agflen; /* native ag. freespace length */
215 int sblock_ok; /* block passes checks */
217 agbp = cur->bc_private.a.agbp;
218 agf = XFS_BUF_TO_AGF(agbp);
219 agflen = be32_to_cpu(agf->agf_length);
221 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
222 be16_to_cpu(block->bb_level) == level &&
223 be16_to_cpu(block->bb_numrecs) <=
224 cur->bc_ops->get_maxrecs(cur, level) &&
225 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
226 be32_to_cpu(block->bb_leftsib) < agflen) &&
228 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
229 be32_to_cpu(block->bb_rightsib) < agflen) &&
231 if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
232 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
233 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
235 xfs_buftrace("SBTREE ERROR", bp);
236 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
238 return XFS_ERROR(EFSCORRUPTED);
244 * Debug routine: check that block header is ok.
247 xfs_btree_check_block(
248 struct xfs_btree_cur *cur, /* btree cursor */
249 struct xfs_btree_block *block, /* generic btree block pointer */
250 int level, /* level of the btree block */
251 struct xfs_buf *bp) /* buffer containing block, if any */
253 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
254 return xfs_btree_check_lblock(cur,
255 (struct xfs_btree_lblock *)block, level, bp);
257 return xfs_btree_check_sblock(cur,
258 (struct xfs_btree_sblock *)block, level, bp);
263 * Check that (long) pointer is ok.
265 int /* error (0 or EFSCORRUPTED) */
266 xfs_btree_check_lptr(
267 struct xfs_btree_cur *cur, /* btree cursor */
268 xfs_dfsbno_t bno, /* btree block disk address */
269 int level) /* btree block level */
271 XFS_WANT_CORRUPTED_RETURN(
274 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
279 * Check that (short) pointer is ok.
281 int /* error (0 or EFSCORRUPTED) */
282 xfs_btree_check_sptr(
283 struct xfs_btree_cur *cur, /* btree cursor */
284 xfs_agblock_t bno, /* btree block disk address */
285 int level) /* btree block level */
287 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
289 XFS_WANT_CORRUPTED_RETURN(
291 bno != NULLAGBLOCK &&
298 * Check that block ptr is ok.
300 int /* error (0 or EFSCORRUPTED) */
302 struct xfs_btree_cur *cur, /* btree cursor */
303 union xfs_btree_ptr *ptr, /* btree block disk address */
304 int index, /* offset from ptr to check */
305 int level) /* btree block level */
307 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
308 return xfs_btree_check_lptr(cur,
309 be64_to_cpu((&ptr->l)[index]), level);
311 return xfs_btree_check_sptr(cur,
312 be32_to_cpu((&ptr->s)[index]), level);
317 * Delete the btree cursor.
320 xfs_btree_del_cursor(
321 xfs_btree_cur_t *cur, /* btree cursor */
322 int error) /* del because of error */
324 int i; /* btree level */
327 * Clear the buffer pointers, and release the buffers.
328 * If we're doing this in the face of an error, we
329 * need to make sure to inspect all of the entries
330 * in the bc_bufs array for buffers to be unlocked.
331 * This is because some of the btree code works from
332 * level n down to 0, and if we get an error along
333 * the way we won't have initialized all the entries
336 for (i = 0; i < cur->bc_nlevels; i++) {
338 xfs_btree_setbuf(cur, i, NULL);
343 * Can't free a bmap cursor without having dealt with the
344 * allocated indirect blocks' accounting.
346 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
347 cur->bc_private.b.allocated == 0);
351 kmem_zone_free(xfs_btree_cur_zone, cur);
355 * Duplicate the btree cursor.
356 * Allocate a new one, copy the record, re-get the buffers.
359 xfs_btree_dup_cursor(
360 xfs_btree_cur_t *cur, /* input cursor */
361 xfs_btree_cur_t **ncur) /* output cursor */
363 xfs_buf_t *bp; /* btree block's buffer pointer */
364 int error; /* error return value */
365 int i; /* level number of btree block */
366 xfs_mount_t *mp; /* mount structure for filesystem */
367 xfs_btree_cur_t *new; /* new cursor value */
368 xfs_trans_t *tp; /* transaction pointer, can be NULL */
374 * Allocate a new cursor like the old one.
376 new = cur->bc_ops->dup_cursor(cur);
379 * Copy the record currently in the cursor.
381 new->bc_rec = cur->bc_rec;
384 * For each level current, re-get the buffer and copy the ptr value.
386 for (i = 0; i < new->bc_nlevels; i++) {
387 new->bc_ptrs[i] = cur->bc_ptrs[i];
388 new->bc_ra[i] = cur->bc_ra[i];
389 if ((bp = cur->bc_bufs[i])) {
390 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
391 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
392 xfs_btree_del_cursor(new, error);
396 new->bc_bufs[i] = bp;
398 ASSERT(!XFS_BUF_GETERROR(bp));
400 new->bc_bufs[i] = NULL;
407 * XFS btree block layout and addressing:
409 * There are two types of blocks in the btree: leaf and non-leaf blocks.
411 * The leaf record start with a header then followed by records containing
412 * the values. A non-leaf block also starts with the same header, and
413 * then first contains lookup keys followed by an equal number of pointers
414 * to the btree blocks at the previous level.
416 * +--------+-------+-------+-------+-------+-------+-------+
417 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
418 * +--------+-------+-------+-------+-------+-------+-------+
420 * +--------+-------+-------+-------+-------+-------+-------+
421 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
422 * +--------+-------+-------+-------+-------+-------+-------+
424 * The header is called struct xfs_btree_block for reasons better left unknown
425 * and comes in different versions for short (32bit) and long (64bit) block
426 * pointers. The record and key structures are defined by the btree instances
427 * and opaque to the btree core. The block pointers are simple disk endian
428 * integers, available in a short (32bit) and long (64bit) variant.
430 * The helpers below calculate the offset of a given record, key or pointer
431 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
432 * record, key or pointer (xfs_btree_*_addr). Note that all addressing
433 * inside the btree block is done using indices starting at one, not zero!
437 * Return size of the btree block header for this btree instance.
439 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
441 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
442 sizeof(struct xfs_btree_lblock) :
443 sizeof(struct xfs_btree_sblock);
447 * Return size of btree block pointers for this btree instance.
449 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
451 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
452 sizeof(__be64) : sizeof(__be32);
456 * Calculate offset of the n-th record in a btree block.
459 xfs_btree_rec_offset(
460 struct xfs_btree_cur *cur,
463 return xfs_btree_block_len(cur) +
464 (n - 1) * cur->bc_ops->rec_len;
468 * Calculate offset of the n-th key in a btree block.
471 xfs_btree_key_offset(
472 struct xfs_btree_cur *cur,
475 return xfs_btree_block_len(cur) +
476 (n - 1) * cur->bc_ops->key_len;
480 * Calculate offset of the n-th block pointer in a btree block.
483 xfs_btree_ptr_offset(
484 struct xfs_btree_cur *cur,
488 return xfs_btree_block_len(cur) +
489 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
490 (n - 1) * xfs_btree_ptr_len(cur);
494 * Return a pointer to the n-th record in the btree block.
496 STATIC union xfs_btree_rec *
498 struct xfs_btree_cur *cur,
500 struct xfs_btree_block *block)
502 return (union xfs_btree_rec *)
503 ((char *)block + xfs_btree_rec_offset(cur, n));
507 * Return a pointer to the n-th key in the btree block.
509 STATIC union xfs_btree_key *
511 struct xfs_btree_cur *cur,
513 struct xfs_btree_block *block)
515 return (union xfs_btree_key *)
516 ((char *)block + xfs_btree_key_offset(cur, n));
520 * Return a pointer to the n-th block pointer in the btree block.
522 STATIC union xfs_btree_ptr *
524 struct xfs_btree_cur *cur,
526 struct xfs_btree_block *block)
528 int level = xfs_btree_get_level(block);
530 ASSERT(block->bb_level != 0);
532 return (union xfs_btree_ptr *)
533 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
537 * Get a the root block which is stored in the inode.
539 * For now this btree implementation assumes the btree root is always
540 * stored in the if_broot field of an inode fork.
542 STATIC struct xfs_btree_block *
544 struct xfs_btree_cur *cur)
546 struct xfs_ifork *ifp;
548 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
549 return (struct xfs_btree_block *)ifp->if_broot;
553 * Retrieve the block pointer from the cursor at the given level.
554 * This may be an inode btree root or from a buffer.
556 STATIC struct xfs_btree_block * /* generic btree block pointer */
558 struct xfs_btree_cur *cur, /* btree cursor */
559 int level, /* level in btree */
560 struct xfs_buf **bpp) /* buffer containing the block */
562 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
563 (level == cur->bc_nlevels - 1)) {
565 return xfs_btree_get_iroot(cur);
568 *bpp = cur->bc_bufs[level];
569 return XFS_BUF_TO_BLOCK(*bpp);
573 * Get a buffer for the block, return it with no data read.
574 * Long-form addressing.
576 xfs_buf_t * /* buffer for fsbno */
578 xfs_mount_t *mp, /* file system mount point */
579 xfs_trans_t *tp, /* transaction pointer */
580 xfs_fsblock_t fsbno, /* file system block number */
581 uint lock) /* lock flags for get_buf */
583 xfs_buf_t *bp; /* buffer pointer (return value) */
584 xfs_daddr_t d; /* real disk block address */
586 ASSERT(fsbno != NULLFSBLOCK);
587 d = XFS_FSB_TO_DADDR(mp, fsbno);
588 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
590 ASSERT(!XFS_BUF_GETERROR(bp));
595 * Get a buffer for the block, return it with no data read.
596 * Short-form addressing.
598 xfs_buf_t * /* buffer for agno/agbno */
600 xfs_mount_t *mp, /* file system mount point */
601 xfs_trans_t *tp, /* transaction pointer */
602 xfs_agnumber_t agno, /* allocation group number */
603 xfs_agblock_t agbno, /* allocation group block number */
604 uint lock) /* lock flags for get_buf */
606 xfs_buf_t *bp; /* buffer pointer (return value) */
607 xfs_daddr_t d; /* real disk block address */
609 ASSERT(agno != NULLAGNUMBER);
610 ASSERT(agbno != NULLAGBLOCK);
611 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
612 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
614 ASSERT(!XFS_BUF_GETERROR(bp));
619 * Check for the cursor referring to the last block at the given level.
621 int /* 1=is last block, 0=not last block */
622 xfs_btree_islastblock(
623 xfs_btree_cur_t *cur, /* btree cursor */
624 int level) /* level to check */
626 xfs_btree_block_t *block; /* generic btree block pointer */
627 xfs_buf_t *bp; /* buffer containing block */
629 block = xfs_btree_get_block(cur, level, &bp);
630 xfs_btree_check_block(cur, block, level, bp);
631 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
632 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
634 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
638 * Change the cursor to point to the first record at the given level.
639 * Other levels are unaffected.
641 int /* success=1, failure=0 */
643 xfs_btree_cur_t *cur, /* btree cursor */
644 int level) /* level to change */
646 xfs_btree_block_t *block; /* generic btree block pointer */
647 xfs_buf_t *bp; /* buffer containing block */
650 * Get the block pointer for this level.
652 block = xfs_btree_get_block(cur, level, &bp);
653 xfs_btree_check_block(cur, block, level, bp);
655 * It's empty, there is no such record.
657 if (!block->bb_numrecs)
660 * Set the ptr value to 1, that's the first record/key.
662 cur->bc_ptrs[level] = 1;
667 * Change the cursor to point to the last record in the current block
668 * at the given level. Other levels are unaffected.
670 int /* success=1, failure=0 */
672 xfs_btree_cur_t *cur, /* btree cursor */
673 int level) /* level to change */
675 xfs_btree_block_t *block; /* generic btree block pointer */
676 xfs_buf_t *bp; /* buffer containing block */
679 * Get the block pointer for this level.
681 block = xfs_btree_get_block(cur, level, &bp);
682 xfs_btree_check_block(cur, block, level, bp);
684 * It's empty, there is no such record.
686 if (!block->bb_numrecs)
689 * Set the ptr value to numrecs, that's the last record/key.
691 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
696 * Compute first and last byte offsets for the fields given.
697 * Interprets the offsets table, which contains struct field offsets.
701 __int64_t fields, /* bitmask of fields */
702 const short *offsets, /* table of field offsets */
703 int nbits, /* number of bits to inspect */
704 int *first, /* output: first byte offset */
705 int *last) /* output: last byte offset */
707 int i; /* current bit number */
708 __int64_t imask; /* mask for current bit number */
712 * Find the lowest bit, so the first byte offset.
714 for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
715 if (imask & fields) {
721 * Find the highest bit, so the last byte offset.
723 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
724 if (imask & fields) {
725 *last = offsets[i + 1] - 1;
732 * Get a buffer for the block, return it read in.
733 * Long-form addressing.
737 xfs_mount_t *mp, /* file system mount point */
738 xfs_trans_t *tp, /* transaction pointer */
739 xfs_fsblock_t fsbno, /* file system block number */
740 uint lock, /* lock flags for read_buf */
741 xfs_buf_t **bpp, /* buffer for fsbno */
742 int refval) /* ref count value for buffer */
744 xfs_buf_t *bp; /* return value */
745 xfs_daddr_t d; /* real disk block address */
748 ASSERT(fsbno != NULLFSBLOCK);
749 d = XFS_FSB_TO_DADDR(mp, fsbno);
750 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
751 mp->m_bsize, lock, &bp))) {
754 ASSERT(!bp || !XFS_BUF_GETERROR(bp));
756 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
763 * Get a buffer for the block, return it read in.
764 * Short-form addressing.
768 xfs_mount_t *mp, /* file system mount point */
769 xfs_trans_t *tp, /* transaction pointer */
770 xfs_agnumber_t agno, /* allocation group number */
771 xfs_agblock_t agbno, /* allocation group block number */
772 uint lock, /* lock flags for read_buf */
773 xfs_buf_t **bpp, /* buffer for agno/agbno */
774 int refval) /* ref count value for buffer */
776 xfs_buf_t *bp; /* return value */
777 xfs_daddr_t d; /* real disk block address */
780 ASSERT(agno != NULLAGNUMBER);
781 ASSERT(agbno != NULLAGBLOCK);
782 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
783 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
784 mp->m_bsize, lock, &bp))) {
787 ASSERT(!bp || !XFS_BUF_GETERROR(bp));
790 case XFS_ALLOC_BTREE_REF:
791 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
793 case XFS_INO_BTREE_REF:
794 XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
803 * Read-ahead the block, don't wait for it, don't return a buffer.
804 * Long-form addressing.
808 xfs_btree_reada_bufl(
809 xfs_mount_t *mp, /* file system mount point */
810 xfs_fsblock_t fsbno, /* file system block number */
811 xfs_extlen_t count) /* count of filesystem blocks */
815 ASSERT(fsbno != NULLFSBLOCK);
816 d = XFS_FSB_TO_DADDR(mp, fsbno);
817 xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
821 * Read-ahead the block, don't wait for it, don't return a buffer.
822 * Short-form addressing.
826 xfs_btree_reada_bufs(
827 xfs_mount_t *mp, /* file system mount point */
828 xfs_agnumber_t agno, /* allocation group number */
829 xfs_agblock_t agbno, /* allocation group block number */
830 xfs_extlen_t count) /* count of filesystem blocks */
834 ASSERT(agno != NULLAGNUMBER);
835 ASSERT(agbno != NULLAGBLOCK);
836 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
837 xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
841 xfs_btree_readahead_lblock(
842 struct xfs_btree_cur *cur,
844 struct xfs_btree_block *block)
847 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
848 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
850 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
851 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
855 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
856 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
864 xfs_btree_readahead_sblock(
865 struct xfs_btree_cur *cur,
867 struct xfs_btree_block *block)
870 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
871 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
874 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
875 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
880 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
881 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
890 * Read-ahead btree blocks, at the given level.
891 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
895 struct xfs_btree_cur *cur, /* btree cursor */
896 int lev, /* level in btree */
897 int lr) /* left/right bits */
899 struct xfs_btree_block *block;
902 * No readahead needed if we are at the root level and the
903 * btree root is stored in the inode.
905 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
906 (lev == cur->bc_nlevels - 1))
909 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
912 cur->bc_ra[lev] |= lr;
913 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
915 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
916 return xfs_btree_readahead_lblock(cur, lr, block);
917 return xfs_btree_readahead_sblock(cur, lr, block);
921 * Set the buffer for level "lev" in the cursor to bp, releasing
922 * any previous buffer.
926 xfs_btree_cur_t *cur, /* btree cursor */
927 int lev, /* level in btree */
928 xfs_buf_t *bp) /* new buffer to set */
930 xfs_btree_block_t *b; /* btree block */
931 xfs_buf_t *obp; /* old buffer pointer */
933 obp = cur->bc_bufs[lev];
935 xfs_trans_brelse(cur->bc_tp, obp);
936 cur->bc_bufs[lev] = bp;
940 b = XFS_BUF_TO_BLOCK(bp);
941 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
942 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
943 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
944 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
945 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
947 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
948 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
949 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
950 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
955 xfs_btree_ptr_is_null(
956 struct xfs_btree_cur *cur,
957 union xfs_btree_ptr *ptr)
959 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
960 return be64_to_cpu(ptr->l) == NULLFSBLOCK;
962 return be32_to_cpu(ptr->s) == NULLAGBLOCK;
966 * Get/set/init sibling pointers
969 xfs_btree_get_sibling(
970 struct xfs_btree_cur *cur,
971 struct xfs_btree_block *block,
972 union xfs_btree_ptr *ptr,
975 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
977 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
978 if (lr == XFS_BB_RIGHTSIB)
979 ptr->l = block->bb_u.l.bb_rightsib;
981 ptr->l = block->bb_u.l.bb_leftsib;
983 if (lr == XFS_BB_RIGHTSIB)
984 ptr->s = block->bb_u.s.bb_rightsib;
986 ptr->s = block->bb_u.s.bb_leftsib;
991 xfs_btree_ptr_to_daddr(
992 struct xfs_btree_cur *cur,
993 union xfs_btree_ptr *ptr)
995 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
996 ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK);
998 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
1000 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
1001 ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
1003 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
1004 be32_to_cpu(ptr->s));
1010 struct xfs_btree_cur *cur,
1013 switch (cur->bc_btnum) {
1016 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
1019 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
1021 case XFS_BTNUM_BMAP:
1022 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
1030 * Read in the buffer at the given ptr and return the buffer and
1031 * the block pointer within the buffer.
1034 xfs_btree_read_buf_block(
1035 struct xfs_btree_cur *cur,
1036 union xfs_btree_ptr *ptr,
1039 struct xfs_btree_block **block,
1040 struct xfs_buf **bpp)
1042 struct xfs_mount *mp = cur->bc_mp;
1046 /* need to sort out how callers deal with failures first */
1047 ASSERT(!(flags & XFS_BUF_TRYLOCK));
1049 d = xfs_btree_ptr_to_daddr(cur, ptr);
1050 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1051 mp->m_bsize, flags, bpp);
1055 ASSERT(*bpp != NULL);
1056 ASSERT(!XFS_BUF_GETERROR(*bpp));
1058 xfs_btree_set_refs(cur, *bpp);
1059 *block = XFS_BUF_TO_BLOCK(*bpp);
1061 error = xfs_btree_check_block(cur, *block, level, *bpp);
1063 xfs_trans_brelse(cur->bc_tp, *bpp);
1068 * Increment cursor by one record at the level.
1069 * For nonzero levels the leaf-ward information is untouched.
1072 xfs_btree_increment(
1073 struct xfs_btree_cur *cur,
1075 int *stat) /* success/failure */
1077 struct xfs_btree_block *block;
1078 union xfs_btree_ptr ptr;
1080 int error; /* error return value */
1083 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1084 XFS_BTREE_TRACE_ARGI(cur, level);
1086 ASSERT(level < cur->bc_nlevels);
1088 /* Read-ahead to the right at this level. */
1089 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1091 /* Get a pointer to the btree block. */
1092 block = xfs_btree_get_block(cur, level, &bp);
1095 error = xfs_btree_check_block(cur, block, level, bp);
1100 /* We're done if we remain in the block after the increment. */
1101 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1104 /* Fail if we just went off the right edge of the tree. */
1105 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1106 if (xfs_btree_ptr_is_null(cur, &ptr))
1109 XFS_BTREE_STATS_INC(cur, increment);
1112 * March up the tree incrementing pointers.
1113 * Stop when we don't go off the right edge of a block.
1115 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1116 block = xfs_btree_get_block(cur, lev, &bp);
1119 error = xfs_btree_check_block(cur, block, lev, bp);
1124 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1127 /* Read-ahead the right block for the next loop. */
1128 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1132 * If we went off the root then we are either seriously
1133 * confused or have the tree root in an inode.
1135 if (lev == cur->bc_nlevels) {
1136 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1139 error = EFSCORRUPTED;
1142 ASSERT(lev < cur->bc_nlevels);
1145 * Now walk back down the tree, fixing up the cursor's buffer
1146 * pointers and key numbers.
1148 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1149 union xfs_btree_ptr *ptrp;
1151 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1152 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1157 xfs_btree_setbuf(cur, lev, bp);
1158 cur->bc_ptrs[lev] = 1;
1161 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1166 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1171 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);