[XFS] split up xlog_recover_process_iunlinks
[safe/jmp/linux-2.6] / fs / xfs / xfs_ialloc.c
index 0298969..16eda31 100644 (file)
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
-
-#include "xfs_macros.h"
+#include "xfs_fs.h"
 #include "xfs_types.h"
-#include "xfs_inum.h"
+#include "xfs_bit.h"
 #include "xfs_log.h"
+#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
 #include "xfs_alloc.h"
-#include "xfs_bit.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_bmap.h"
 
+
 /*
- * Log specified fields for the inode given by bp and off.
+ * Allocation group level functions.
  */
-STATIC void
-xfs_ialloc_log_di(
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_buf_t       *bp,            /* inode buffer */
-       int             off,            /* index of inode in buffer */
-       int             fields)         /* bitmask of fields to log */
+static inline int
+xfs_ialloc_cluster_alignment(
+       xfs_alloc_arg_t *args)
 {
-       int                     first;          /* first byte number */
-       int                     ioffset;        /* off in bytes */
-       int                     last;           /* last byte number */
-       xfs_mount_t             *mp;            /* mount point structure */
-       static const short      offsets[] = {   /* field offsets */
-                                               /* keep in sync with bits */
-               offsetof(xfs_dinode_core_t, di_magic),
-               offsetof(xfs_dinode_core_t, di_mode),
-               offsetof(xfs_dinode_core_t, di_version),
-               offsetof(xfs_dinode_core_t, di_format),
-               offsetof(xfs_dinode_core_t, di_onlink),
-               offsetof(xfs_dinode_core_t, di_uid),
-               offsetof(xfs_dinode_core_t, di_gid),
-               offsetof(xfs_dinode_core_t, di_nlink),
-               offsetof(xfs_dinode_core_t, di_projid),
-               offsetof(xfs_dinode_core_t, di_pad),
-               offsetof(xfs_dinode_core_t, di_atime),
-               offsetof(xfs_dinode_core_t, di_mtime),
-               offsetof(xfs_dinode_core_t, di_ctime),
-               offsetof(xfs_dinode_core_t, di_size),
-               offsetof(xfs_dinode_core_t, di_nblocks),
-               offsetof(xfs_dinode_core_t, di_extsize),
-               offsetof(xfs_dinode_core_t, di_nextents),
-               offsetof(xfs_dinode_core_t, di_anextents),
-               offsetof(xfs_dinode_core_t, di_forkoff),
-               offsetof(xfs_dinode_core_t, di_aformat),
-               offsetof(xfs_dinode_core_t, di_dmevmask),
-               offsetof(xfs_dinode_core_t, di_dmstate),
-               offsetof(xfs_dinode_core_t, di_flags),
-               offsetof(xfs_dinode_core_t, di_gen),
-               offsetof(xfs_dinode_t, di_next_unlinked),
-               offsetof(xfs_dinode_t, di_u),
-               offsetof(xfs_dinode_t, di_a),
-               sizeof(xfs_dinode_t)
-       };
+       if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
+           args->mp->m_sb.sb_inoalignmt >=
+            XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp)))
+               return args->mp->m_sb.sb_inoalignmt;
+       return 1;
+}
 
+/*
+ * Lookup the record equal to ino in the btree given by cur.
+ */
+STATIC int                             /* error */
+xfs_inobt_lookup_eq(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agino_t             ino,    /* starting inode of chunk */
+       __int32_t               fcnt,   /* free inode count */
+       xfs_inofree_t           free,   /* free inode mask */
+       int                     *stat)  /* success/failure */
+{
+       cur->bc_rec.i.ir_startino = ino;
+       cur->bc_rec.i.ir_freecount = fcnt;
+       cur->bc_rec.i.ir_free = free;
+       return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
 
-       ASSERT(offsetof(xfs_dinode_t, di_core) == 0);
-       ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0);
-       mp = tp->t_mountp;
-       /*
-        * Get the inode-relative first and last bytes for these fields
-        */
-       xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last);
-       /*
-        * Convert to buffer offsets and log it.
-        */
-       ioffset = off << mp->m_sb.sb_inodelog;
-       first += ioffset;
-       last += ioffset;
-       xfs_trans_log_buf(tp, bp, first, last);
+/*
+ * Lookup the first record greater than or equal to ino
+ * in the btree given by cur.
+ */
+int                                    /* error */
+xfs_inobt_lookup_ge(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agino_t             ino,    /* starting inode of chunk */
+       __int32_t               fcnt,   /* free inode count */
+       xfs_inofree_t           free,   /* free inode mask */
+       int                     *stat)  /* success/failure */
+{
+       cur->bc_rec.i.ir_startino = ino;
+       cur->bc_rec.i.ir_freecount = fcnt;
+       cur->bc_rec.i.ir_free = free;
+       return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 }
 
 /*
- * Allocation group level functions.
+ * Lookup the first record less than or equal to ino
+ * in the btree given by cur.
  */
+int                                    /* error */
+xfs_inobt_lookup_le(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agino_t             ino,    /* starting inode of chunk */
+       __int32_t               fcnt,   /* free inode count */
+       xfs_inofree_t           free,   /* free inode mask */
+       int                     *stat)  /* success/failure */
+{
+       cur->bc_rec.i.ir_startino = ino;
+       cur->bc_rec.i.ir_freecount = fcnt;
+       cur->bc_rec.i.ir_free = free;
+       return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [ino, fcnt, free].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int                             /* error */
+xfs_inobt_update(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agino_t             ino,    /* starting inode of chunk */
+       __int32_t               fcnt,   /* free inode count */
+       xfs_inofree_t           free)   /* free inode mask */
+{
+       union xfs_btree_rec     rec;
+
+       rec.inobt.ir_startino = cpu_to_be32(ino);
+       rec.inobt.ir_freecount = cpu_to_be32(fcnt);
+       rec.inobt.ir_free = cpu_to_be64(free);
+       return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int                                    /* error */
+xfs_inobt_get_rec(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agino_t             *ino,   /* output: starting inode of chunk */
+       __int32_t               *fcnt,  /* output: number of free inodes */
+       xfs_inofree_t           *free,  /* output: free inode mask */
+       int                     *stat)  /* output: success/failure */
+{
+       union xfs_btree_rec     *rec;
+       int                     error;
+
+       error = xfs_btree_get_rec(cur, &rec, stat);
+       if (!error && *stat == 1) {
+               *ino = be32_to_cpu(rec->inobt.ir_startino);
+               *fcnt = be32_to_cpu(rec->inobt.ir_freecount);
+               *free = be64_to_cpu(rec->inobt.ir_free);
+       }
+       return error;
+}
 
 /*
  * Allocate new inodes in the allocation group specified by agbp.
@@ -141,6 +167,7 @@ xfs_ialloc_ag_alloc(
        int             blks_per_cluster;  /* fs blocks per inode cluster */
        xfs_btree_cur_t *cur;           /* inode btree cursor */
        xfs_daddr_t     d;              /* disk addr of buffer */
+       xfs_agnumber_t  agno;
        int             error;
        xfs_buf_t       *fbuf;          /* new free inodes' buffer */
        xfs_dinode_t    *free;          /* new free inode structure */
@@ -152,10 +179,9 @@ xfs_ialloc_ag_alloc(
        int             ninodes;        /* num inodes per buf */
        xfs_agino_t     thisino;        /* current inode number, for loop */
        int             version;        /* inode version number to use */
-       int             isaligned;      /* inode allocation at stripe unit */
+       int             isaligned = 0;  /* inode allocation at stripe unit */
                                        /* boundary */
-       xfs_dinode_core_t dic;          /* a dinode_core to copy to new */
-                                       /* inodes */
+       unsigned int    gen;
 
        args.tp = tp;
        args.mp = tp->t_mountp;
@@ -170,46 +196,84 @@ xfs_ialloc_ag_alloc(
                return XFS_ERROR(ENOSPC);
        args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
        /*
-        * Set the alignment for the allocation.
-        * If stripe alignment is turned on then align at stripe unit
-        * boundary.
-        * If the cluster size is smaller than a filesystem block
-        * then we're doing I/O for inodes in filesystem block size pieces,
-        * so don't need alignment anyway.
-        */
-       isaligned = 0;
-       if (args.mp->m_sinoalign) {
-               ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
-               args.alignment = args.mp->m_dalign;
-               isaligned = 1;
-       } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
-           args.mp->m_sb.sb_inoalignmt >=
-           XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
-               args.alignment = args.mp->m_sb.sb_inoalignmt;
-       else
-               args.alignment = 1;
+        * First try to allocate inodes contiguous with the last-allocated
+        * chunk of inodes.  If the filesystem is striped, this will fill
+        * an entire stripe unit with inodes.
+        */
        agi = XFS_BUF_TO_AGI(agbp);
-       /*
-        * Need to figure out where to allocate the inode blocks.
-        * Ideally they should be spaced out through the a.g.
-        * For now, just allocate blocks up front.
-        */
-       args.agbno = INT_GET(agi->agi_root, ARCH_CONVERT);
-       args.fsbno = XFS_AGB_TO_FSB(args.mp, INT_GET(agi->agi_seqno, ARCH_CONVERT),
-                                   args.agbno);
-       /*
-        * Allocate a fixed-size extent of inodes.
-        */
-       args.type = XFS_ALLOCTYPE_NEAR_BNO;
-       args.mod = args.total = args.wasdel = args.isfl = args.userdata =
-               args.minalignslop = 0;
-       args.prod = 1;
-       /*
-        * Allow space for the inode btree to split.
-        */
-       args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
-       if ((error = xfs_alloc_vextent(&args)))
-               return error;
+       newino = be32_to_cpu(agi->agi_newino);
+       args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
+                       XFS_IALLOC_BLOCKS(args.mp);
+       if (likely(newino != NULLAGINO &&
+                 (args.agbno < be32_to_cpu(agi->agi_length)))) {
+               args.fsbno = XFS_AGB_TO_FSB(args.mp,
+                               be32_to_cpu(agi->agi_seqno), args.agbno);
+               args.type = XFS_ALLOCTYPE_THIS_BNO;
+               args.mod = args.total = args.wasdel = args.isfl =
+                       args.userdata = args.minalignslop = 0;
+               args.prod = 1;
+
+               /*
+                * We need to take into account alignment here to ensure that
+                * we don't modify the free list if we fail to have an exact
+                * block. If we don't have an exact match, and every oher
+                * attempt allocation attempt fails, we'll end up cancelling
+                * a dirty transaction and shutting down.
+                *
+                * For an exact allocation, alignment must be 1,
+                * however we need to take cluster alignment into account when
+                * fixing up the freelist. Use the minalignslop field to
+                * indicate that extra blocks might be required for alignment,
+                * but not to use them in the actual exact allocation.
+                */
+               args.alignment = 1;
+               args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
+
+               /* Allow space for the inode btree to split. */
+               args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+               if ((error = xfs_alloc_vextent(&args)))
+                       return error;
+       } else
+               args.fsbno = NULLFSBLOCK;
+
+       if (unlikely(args.fsbno == NULLFSBLOCK)) {
+               /*
+                * Set the alignment for the allocation.
+                * If stripe alignment is turned on then align at stripe unit
+                * boundary.
+                * If the cluster size is smaller than a filesystem block
+                * then we're doing I/O for inodes in filesystem block size
+                * pieces, so don't need alignment anyway.
+                */
+               isaligned = 0;
+               if (args.mp->m_sinoalign) {
+                       ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
+                       args.alignment = args.mp->m_dalign;
+                       isaligned = 1;
+               } else
+                       args.alignment = xfs_ialloc_cluster_alignment(&args);
+               /*
+                * Need to figure out where to allocate the inode blocks.
+                * Ideally they should be spaced out through the a.g.
+                * For now, just allocate blocks up front.
+                */
+               args.agbno = be32_to_cpu(agi->agi_root);
+               args.fsbno = XFS_AGB_TO_FSB(args.mp,
+                               be32_to_cpu(agi->agi_seqno), args.agbno);
+               /*
+                * Allocate a fixed-size extent of inodes.
+                */
+               args.type = XFS_ALLOCTYPE_NEAR_BNO;
+               args.mod = args.total = args.wasdel = args.isfl =
+                       args.userdata = args.minalignslop = 0;
+               args.prod = 1;
+               /*
+                * Allow space for the inode btree to split.
+                */
+               args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+               if ((error = xfs_alloc_vextent(&args)))
+                       return error;
+       }
 
        /*
         * If stripe alignment is turned on, then try again with cluster
@@ -217,15 +281,10 @@ xfs_ialloc_ag_alloc(
         */
        if (isaligned && args.fsbno == NULLFSBLOCK) {
                args.type = XFS_ALLOCTYPE_NEAR_BNO;
-               args.agbno = INT_GET(agi->agi_root, ARCH_CONVERT);
+               args.agbno = be32_to_cpu(agi->agi_root);
                args.fsbno = XFS_AGB_TO_FSB(args.mp,
-                               INT_GET(agi->agi_seqno, ARCH_CONVERT), args.agbno);
-               if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
-                       args.mp->m_sb.sb_inoalignmt >=
-                       XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
-                               args.alignment = args.mp->m_sb.sb_inoalignmt;
-               else
-                       args.alignment = 1;
+                               be32_to_cpu(agi->agi_seqno), args.agbno);
+               args.alignment = xfs_ialloc_cluster_alignment(&args);
                if ((error = xfs_alloc_vextent(&args)))
                        return error;
        }
@@ -261,51 +320,63 @@ xfs_ialloc_ag_alloc(
         * use the old version so that old kernels will continue to be
         * able to use the file system.
         */
-       if (XFS_SB_VERSION_HASNLINK(&args.mp->m_sb))
-               version = XFS_DINODE_VERSION_2;
+       if (xfs_sb_version_hasnlink(&args.mp->m_sb))
+               version = 2;
        else
-               version = XFS_DINODE_VERSION_1;
-
-       memset(&dic, 0, sizeof(xfs_dinode_core_t));
-       INT_SET(dic.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
-       INT_SET(dic.di_version, ARCH_CONVERT, version);
+               version = 1;
 
+       /*
+        * Seed the new inode cluster with a random generation number. This
+        * prevents short-term reuse of generation numbers if a chunk is
+        * freed and then immediately reallocated. We use random numbers
+        * rather than a linear progression to prevent the next generation
+        * number from being easily guessable.
+        */
+       gen = random32();
        for (j = 0; j < nbufs; j++) {
                /*
                 * Get the block.
                 */
-               d = XFS_AGB_TO_DADDR(args.mp, INT_GET(agi->agi_seqno, ARCH_CONVERT),
+               d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno),
                                     args.agbno + (j * blks_per_cluster));
                fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
                                         args.mp->m_bsize * blks_per_cluster,
                                         XFS_BUF_LOCK);
                ASSERT(fbuf);
                ASSERT(!XFS_BUF_GETERROR(fbuf));
+
                /*
-                * Loop over the inodes in this buffer.
+                * Initialize all inodes in this buffer and then log them.
+                *
+                * XXX: It would be much better if we had just one transaction to
+                *      log a whole cluster of inodes instead of all the indivdual
+                *      transactions causing a lot of log traffic.
                 */
-
+               xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
                for (i = 0; i < ninodes; i++) {
+                       int     ioffset = i << args.mp->m_sb.sb_inodelog;
+                       uint    isize = sizeof(struct xfs_dinode);
+
                        free = XFS_MAKE_IPTR(args.mp, fbuf, i);
-                       memcpy(&(free->di_core), &dic, sizeof(xfs_dinode_core_t));
-                       INT_SET(free->di_next_unlinked, ARCH_CONVERT, NULLAGINO);
-                       xfs_ialloc_log_di(tp, fbuf, i,
-                               XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
+                       free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
+                       free->di_version = version;
+                       free->di_gen = cpu_to_be32(gen);
+                       free->di_next_unlinked = cpu_to_be32(NULLAGINO);
+                       xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
                }
                xfs_trans_inode_alloc_buf(tp, fbuf);
        }
-       INT_MOD(agi->agi_count, ARCH_CONVERT, newlen);
-       INT_MOD(agi->agi_freecount, ARCH_CONVERT, newlen);
+       be32_add_cpu(&agi->agi_count, newlen);
+       be32_add_cpu(&agi->agi_freecount, newlen);
+       agno = be32_to_cpu(agi->agi_seqno);
        down_read(&args.mp->m_peraglock);
-       args.mp->m_perag[INT_GET(agi->agi_seqno, ARCH_CONVERT)].pagi_freecount += newlen;
+       args.mp->m_perag[agno].pagi_freecount += newlen;
        up_read(&args.mp->m_peraglock);
-       INT_SET(agi->agi_newino, ARCH_CONVERT, newino);
+       agi->agi_newino = cpu_to_be32(newino);
        /*
         * Insert records describing the new inode chunk into the btree.
         */
-       cur = xfs_btree_init_cursor(args.mp, tp, agbp,
-                       INT_GET(agi->agi_seqno, ARCH_CONVERT),
-                       XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
+       cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
        for (thisino = newino;
             thisino < newino + newlen;
             thisino += XFS_INODES_PER_CHUNK) {
@@ -315,7 +386,7 @@ xfs_ialloc_ag_alloc(
                        return error;
                }
                ASSERT(i == 0);
-               if ((error = xfs_inobt_insert(cur, &i))) {
+               if ((error = xfs_btree_insert(cur, &i))) {
                        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
                        return error;
                }
@@ -336,7 +407,7 @@ xfs_ialloc_ag_alloc(
        return 0;
 }
 
-STATIC __inline xfs_agnumber_t
+STATIC_INLINE xfs_agnumber_t
 xfs_ialloc_next_ag(
        xfs_mount_t     *mp)
 {
@@ -452,7 +523,7 @@ nextag:
                 */
                if (XFS_FORCED_SHUTDOWN(mp)) {
                        up_read(&mp->m_peraglock);
-                       return (xfs_buf_t *)0;
+                       return NULL;
                }
                agno++;
                if (agno >= agcount)
@@ -460,7 +531,7 @@ nextag:
                if (agno == pagno) {
                        if (flags == 0) {
                                up_read(&mp->m_peraglock);
-                               return (xfs_buf_t *)0;
+                               return NULL;
                        }
                        flags = 0;
                }
@@ -523,10 +594,10 @@ xfs_dialloc(
        int             offset;         /* index of inode in chunk */
        xfs_agino_t     pagino;         /* parent's a.g. relative inode # */
        xfs_agnumber_t  pagno;          /* parent's allocation group number */
-       xfs_inobt_rec_t rec;            /* inode allocation record */
+       xfs_inobt_rec_incore_t rec;     /* inode allocation record */
        xfs_agnumber_t  tagno;          /* testing allocation group number */
        xfs_btree_cur_t *tcur;          /* temp cursor */
-       xfs_inobt_rec_t trec;           /* temp inode allocation record */
+       xfs_inobt_rec_incore_t trec;    /* temp inode allocation record */
 
 
        if (*IO_agbp == NULL) {
@@ -544,7 +615,7 @@ xfs_dialloc(
                        return 0;
                }
                agi = XFS_BUF_TO_AGI(agbp);
-               ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
+               ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
        } else {
                /*
                 * Continue where we left off before.  In this case, we
@@ -552,12 +623,12 @@ xfs_dialloc(
                 */
                agbp = *IO_agbp;
                agi = XFS_BUF_TO_AGI(agbp);
-               ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
-               ASSERT(INT_GET(agi->agi_freecount, ARCH_CONVERT) > 0);
+               ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+               ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
        }
        mp = tp->t_mountp;
        agcount = mp->m_sb.sb_agcount;
-       agno = INT_GET(agi->agi_seqno, ARCH_CONVERT);
+       agno = be32_to_cpu(agi->agi_seqno);
        tagno = agno;
        pagno = XFS_INO_TO_AGNO(mp, parent);
        pagino = XFS_INO_TO_AGINO(mp, parent);
@@ -605,7 +676,7 @@ xfs_dialloc(
                                 * can commit the current transaction and call
                                 * us again where we left off.
                                 */
-                               ASSERT(INT_GET(agi->agi_freecount, ARCH_CONVERT) > 0);
+                               ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
                                *alloc_done = B_TRUE;
                                *IO_agbp = agbp;
                                *inop = NULLFSINO;
@@ -636,7 +707,7 @@ nextag:
                if (error)
                        goto nextag;
                agi = XFS_BUF_TO_AGI(agbp);
-               ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
+               ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
        }
        /*
         * Here with an allocation group that has a free inode.
@@ -645,14 +716,13 @@ nextag:
         */
        agno = tagno;
        *IO_agbp = NULL;
-       cur = xfs_btree_init_cursor(mp, tp, agbp, INT_GET(agi->agi_seqno, ARCH_CONVERT),
-                                   XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
+       cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
        /*
         * If pagino is 0 (this is the root inode allocation) use newino.
         * This must work because we've just allocated some.
         */
        if (!pagino)
-               pagino = INT_GET(agi->agi_newino, ARCH_CONVERT);
+               pagino = be32_to_cpu(agi->agi_newino);
 #ifdef DEBUG
        if (cur->bc_nlevels == 1) {
                int     freecount = 0;
@@ -666,11 +736,11 @@ nextag:
                                goto error0;
                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
                        freecount += rec.ir_freecount;
-                       if ((error = xfs_inobt_increment(cur, 0, &i)))
+                       if ((error = xfs_btree_increment(cur, 0, &i)))
                                goto error0;
                } while (i == 1);
 
-               ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
+               ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
                       XFS_FORCED_SHUTDOWN(mp));
        }
 #endif
@@ -710,7 +780,7 @@ nextag:
                        /*
                         * Search left with tcur, back up 1 record.
                         */
-                       if ((error = xfs_inobt_decrement(tcur, 0, &i)))
+                       if ((error = xfs_btree_decrement(tcur, 0, &i)))
                                goto error1;
                        doneleft = !i;
                        if (!doneleft) {
@@ -724,7 +794,7 @@ nextag:
                        /*
                         * Search right with cur, go forward 1 record.
                         */
-                       if ((error = xfs_inobt_increment(cur, 0, &i)))
+                       if ((error = xfs_btree_increment(cur, 0, &i)))
                                goto error1;
                        doneright = !i;
                        if (!doneright) {
@@ -786,7 +856,7 @@ nextag:
                                 * further left.
                                 */
                                if (useleft) {
-                                       if ((error = xfs_inobt_decrement(tcur, 0,
+                                       if ((error = xfs_btree_decrement(tcur, 0,
                                                        &i)))
                                                goto error1;
                                        doneleft = !i;
@@ -806,7 +876,7 @@ nextag:
                                 * further right.
                                 */
                                else {
-                                       if ((error = xfs_inobt_increment(cur, 0,
+                                       if ((error = xfs_btree_increment(cur, 0,
                                                        &i)))
                                                goto error1;
                                        doneright = !i;
@@ -829,9 +899,9 @@ nextag:
         * In a different a.g. from the parent.
         * See if the most recently allocated block has any free.
         */
-       else if (INT_GET(agi->agi_newino, ARCH_CONVERT) != NULLAGINO) {
+       else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
                if ((error = xfs_inobt_lookup_eq(cur,
-                               INT_GET(agi->agi_newino, ARCH_CONVERT), 0, 0, &i)))
+                               be32_to_cpu(agi->agi_newino), 0, 0, &i)))
                        goto error0;
                if (i == 1 &&
                    (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
@@ -861,7 +931,7 @@ nextag:
                                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
                                if (rec.ir_freecount > 0)
                                        break;
-                               if ((error = xfs_inobt_increment(cur, 0, &i)))
+                               if ((error = xfs_btree_increment(cur, 0, &i)))
                                        goto error0;
                                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
                        }
@@ -878,7 +948,7 @@ nextag:
        if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
                        rec.ir_free)))
                goto error0;
-       INT_MOD(agi->agi_freecount, ARCH_CONVERT, -1);
+       be32_add_cpu(&agi->agi_freecount, -1);
        xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
        down_read(&mp->m_peraglock);
        mp->m_perag[tagno].pagi_freecount--;
@@ -895,10 +965,10 @@ nextag:
                                goto error0;
                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
                        freecount += rec.ir_freecount;
-                       if ((error = xfs_inobt_increment(cur, 0, &i)))
+                       if ((error = xfs_btree_increment(cur, 0, &i)))
                                goto error0;
                } while (i == 1);
-               ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
+               ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
                       XFS_FORCED_SHUTDOWN(mp));
        }
 #endif
@@ -939,7 +1009,7 @@ xfs_difree(
        int             ilen;   /* inodes in an inode cluster */
        xfs_mount_t     *mp;    /* mount structure for filesystem */
        int             off;    /* offset of inode in inode chunk */
-       xfs_inobt_rec_t rec;    /* btree record */
+       xfs_inobt_rec_incore_t rec;     /* btree record */
 
        mp = tp->t_mountp;
 
@@ -986,13 +1056,12 @@ xfs_difree(
                return error;
        }
        agi = XFS_BUF_TO_AGI(agbp);
-       ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
-       ASSERT(agbno < INT_GET(agi->agi_length, ARCH_CONVERT));
+       ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+       ASSERT(agbno < be32_to_cpu(agi->agi_length));
        /*
         * Initialize the cursor.
         */
-       cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO,
-               (xfs_inode_t *)0, 0);
+       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
 #ifdef DEBUG
        if (cur->bc_nlevels == 1) {
                int freecount = 0;
@@ -1005,11 +1074,11 @@ xfs_difree(
                                goto error0;
                        if (i) {
                                freecount += rec.ir_freecount;
-                               if ((error = xfs_inobt_increment(cur, 0, &i)))
+                               if ((error = xfs_btree_increment(cur, 0, &i)))
                                        goto error0;
                        }
                } while (i == 1);
-               ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
+               ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
                       XFS_FORCED_SHUTDOWN(mp));
        }
 #endif
@@ -1044,9 +1113,9 @@ xfs_difree(
        rec.ir_freecount++;
 
        /*
-        * When an inode cluster is free, it becomes elgible for removal
+        * When an inode cluster is free, it becomes eligible for removal
         */
-       if ((mp->m_flags & XFS_MOUNT_IDELETE) &&
+       if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
            (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
 
                *delete = 1;
@@ -1058,8 +1127,8 @@ xfs_difree(
                 * to be freed when the transaction is committed.
                 */
                ilen = XFS_IALLOC_INODES(mp);
-               INT_MOD(agi->agi_count, ARCH_CONVERT, -ilen);
-               INT_MOD(agi->agi_freecount, ARCH_CONVERT, -(ilen - 1));
+               be32_add_cpu(&agi->agi_count, -ilen);
+               be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
                xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
                down_read(&mp->m_peraglock);
                mp->m_perag[agno].pagi_freecount -= ilen - 1;
@@ -1067,8 +1136,8 @@ xfs_difree(
                xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
                xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
 
-               if ((error = xfs_inobt_delete(cur, &i))) {
-                       cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n",
+               if ((error = xfs_btree_delete(cur, &i))) {
+                       cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n",
                                error, mp->m_fsname);
                        goto error0;
                }
@@ -1088,7 +1157,7 @@ xfs_difree(
                /* 
                 * Change the inode free counts and log the ag/sb changes.
                 */
-               INT_MOD(agi->agi_freecount, ARCH_CONVERT, 1);
+               be32_add_cpu(&agi->agi_freecount, 1);
                xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
                down_read(&mp->m_peraglock);
                mp->m_perag[agno].pagi_freecount++;
@@ -1110,11 +1179,11 @@ xfs_difree(
                                goto error0;
                        if (i) {
                                freecount += rec.ir_freecount;
-                               if ((error = xfs_inobt_increment(cur, 0, &i)))
+                               if ((error = xfs_btree_increment(cur, 0, &i)))
                                        goto error0;
                        }
                } while (i == 1);
-               ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
+               ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
                       XFS_FORCED_SHUTDOWN(mp));
        }
 #endif
@@ -1166,6 +1235,9 @@ xfs_dilocate(
        if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
            ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
 #ifdef DEBUG
+               /* no diagnostics for bulkstat, ino comes from userspace */
+               if (flags & XFS_IMAP_BULKSTAT)
+                       return XFS_ERROR(EINVAL);
                if (agno >= mp->m_sb.sb_agcount) {
                        xfs_fs_cmn_err(CE_ALERT, mp,
                                        "xfs_dilocate: agno (%d) >= "
@@ -1186,6 +1258,7 @@ xfs_dilocate(
                                        "(0x%llx)",
                                        ino, XFS_AGINO_TO_INO(mp, agno, agino));
                }
+               xfs_stack_trace();
 #endif /* DEBUG */
                return XFS_ERROR(EINVAL);
        }
@@ -1224,8 +1297,7 @@ xfs_dilocate(
 #endif /* DEBUG */
                        return error;
                }
-               cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO,
-                       (xfs_inode_t *)0, 0);
+               cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
                if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
 #ifdef DEBUG
                        xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
@@ -1323,7 +1395,7 @@ xfs_ialloc_log_agi(
        xfs_agi_t               *agi;   /* allocation group header */
 
        agi = XFS_BUF_TO_AGI(bp);
-       ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
+       ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
 #endif
        /*
         * Compute byte offsets for the first and last fields.
@@ -1335,70 +1407,114 @@ xfs_ialloc_log_agi(
        xfs_trans_log_buf(tp, bp, first, last);
 }
 
+#ifdef DEBUG
+STATIC void
+xfs_check_agi_unlinked(
+       struct xfs_agi          *agi)
+{
+       int                     i;
+
+       for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
+               ASSERT(agi->agi_unlinked[i]);
+}
+#else
+#define xfs_check_agi_unlinked(agi)
+#endif
+
 /*
  * Read in the allocation group header (inode allocation section)
  */
 int
-xfs_ialloc_read_agi(
-       xfs_mount_t     *mp,            /* file system mount structure */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_agnumber_t  agno,           /* allocation group number */
-       xfs_buf_t       **bpp)          /* allocation group hdr buf */
+xfs_read_agi(
+       struct xfs_mount        *mp,    /* file system mount structure */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_agnumber_t          agno,   /* allocation group number */
+       struct xfs_buf          **bpp)  /* allocation group hdr buf */
 {
-       xfs_agi_t       *agi;           /* allocation group header */
-       int             agi_ok;         /* agi is consistent */
-       xfs_buf_t       *bp;            /* allocation group hdr buf */
-       xfs_perag_t     *pag;           /* per allocation group data */
-       int             error;
+       struct xfs_agi          *agi;   /* allocation group header */
+       int                     agi_ok; /* agi is consistent */
+       int                     error;
 
        ASSERT(agno != NULLAGNUMBER);
-       error = xfs_trans_read_buf(
-                       mp, tp, mp->m_ddev_targp,
+
+       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
                        XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
-                       XFS_FSS_TO_BB(mp, 1), 0, &bp);
+                       XFS_FSS_TO_BB(mp, 1), 0, bpp);
        if (error)
                return error;
-       ASSERT(bp && !XFS_BUF_GETERROR(bp));
+
+       ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp));
+       agi = XFS_BUF_TO_AGI(*bpp);
 
        /*
         * Validate the magic number of the agi block.
         */
-       agi = XFS_BUF_TO_AGI(bp);
-       agi_ok =
-               INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC &&
-               XFS_AGI_GOOD_VERSION(
-                       INT_GET(agi->agi_versionnum, ARCH_CONVERT));
+       agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
+               XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
+               be32_to_cpu(agi->agi_seqno) == agno;
        if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
                        XFS_RANDOM_IALLOC_READ_AGI))) {
-               XFS_CORRUPTION_ERROR("xfs_ialloc_read_agi", XFS_ERRLEVEL_LOW,
+               XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW,
                                     mp, agi);
-               xfs_trans_brelse(tp, bp);
+               xfs_trans_brelse(tp, *bpp);
                return XFS_ERROR(EFSCORRUPTED);
        }
+
+       XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF);
+
+       xfs_check_agi_unlinked(agi);
+       return 0;
+}
+
+int
+xfs_ialloc_read_agi(
+       struct xfs_mount        *mp,    /* file system mount structure */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_agnumber_t          agno,   /* allocation group number */
+       struct xfs_buf          **bpp)  /* allocation group hdr buf */
+{
+       struct xfs_agi          *agi;   /* allocation group header */
+       struct xfs_perag        *pag;   /* per allocation group data */
+       int                     error;
+
+       error = xfs_read_agi(mp, tp, agno, bpp);
+       if (error)
+               return error;
+
+       agi = XFS_BUF_TO_AGI(*bpp);
        pag = &mp->m_perag[agno];
+
        if (!pag->pagi_init) {
-               pag->pagi_freecount = INT_GET(agi->agi_freecount, ARCH_CONVERT);
+               pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
+               pag->pagi_count = be32_to_cpu(agi->agi_count);
                pag->pagi_init = 1;
-       } else {
-               /*
-                * It's possible for these to be out of sync if
-                * we are in the middle of a forced shutdown.
-                */
-               ASSERT(pag->pagi_freecount ==
-                               INT_GET(agi->agi_freecount, ARCH_CONVERT)
-                       || XFS_FORCED_SHUTDOWN(mp));
        }
 
-#ifdef DEBUG
-       {
-               int     i;
+       /*
+        * It's possible for these to be out of sync if
+        * we are in the middle of a forced shutdown.
+        */
+       ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
+               XFS_FORCED_SHUTDOWN(mp));
+       return 0;
+}
 
-               for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
-                       ASSERT(agi->agi_unlinked[i]);
-       }
-#endif
+/*
+ * Read in the agi to initialise the per-ag data in the mount structure
+ */
+int
+xfs_ialloc_pagi_init(
+       xfs_mount_t     *mp,            /* file system mount structure */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_agnumber_t  agno)           /* allocation group number */
+{
+       xfs_buf_t       *bp = NULL;
+       int             error;
 
-       XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGI, XFS_AGI_REF);
-       *bpp = bp;
+       error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
+       if (error)
+               return error;
+       if (bp)
+               xfs_trans_brelse(tp, bp);
        return 0;
 }