Merge branch 'bkl/ioctl' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic...
[safe/jmp/linux-2.6] / fs / xfs / xfs_log_recover.c
index 1f0016b..0de08e3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -24,7 +24,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
-#include "xfs_imap.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_log_priv.h"
 #include "xfs_trans_priv.h"
 #include "xfs_quota.h"
 #include "xfs_rw.h"
+#include "xfs_utils.h"
+#include "xfs_trace.h"
 
 STATIC int     xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
 STATIC int     xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
-STATIC void    xlog_recover_insert_item_backq(xlog_recover_item_t **q,
-                                              xlog_recover_item_t *item);
 #if defined(DEBUG)
 STATIC void    xlog_recover_check_summary(xlog_t *);
-STATIC void    xlog_recover_check_ail(xfs_mount_t *, xfs_log_item_t *, int);
 #else
 #define        xlog_recover_check_summary(log)
-#define        xlog_recover_check_ail(mp, lip, gen)
 #endif
 
-
 /*
  * Sector aligned buffer routines for buffer create/read/write/access
  */
 
-#define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs)  \
-       ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \
-       ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) )
-#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno)  ((bno) & ~(log)->l_sectbb_mask)
+/*
+ * Verify the given count of basic blocks is valid number of blocks
+ * to specify for an operation involving the given XFS log buffer.
+ * Returns nonzero if the count is valid, 0 otherwise.
+ */
 
-xfs_buf_t *
-xlog_get_bp(
+static inline int
+xlog_buf_bbcount_valid(
        xlog_t          *log,
-       int             num_bblks)
+       int             bbcount)
 {
-       ASSERT(num_bblks > 0);
+       return bbcount > 0 && bbcount <= log->l_logBBsize;
+}
 
-       if (log->l_sectbb_log) {
-               if (num_bblks > 1)
-                       num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
-               num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks);
+/*
+ * Allocate a buffer to hold log data.  The buffer needs to be able
+ * to map to a range of nbblks basic blocks at any valid (basic
+ * block) offset within the log.
+ */
+STATIC xfs_buf_t *
+xlog_get_bp(
+       xlog_t          *log,
+       int             nbblks)
+{
+       if (!xlog_buf_bbcount_valid(log, nbblks)) {
+               xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+                       nbblks);
+               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
+               return NULL;
        }
-       return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp);
+
+       /*
+        * We do log I/O in units of log sectors (a power-of-2
+        * multiple of the basic block size), so we round up the
+        * requested size to acommodate the basic blocks required
+        * for complete log sectors.
+        *
+        * In addition, the buffer may be used for a non-sector-
+        * aligned block offset, in which case an I/O of the
+        * requested size could extend beyond the end of the
+        * buffer.  If the requested size is only 1 basic block it
+        * will never straddle a sector boundary, so this won't be
+        * an issue.  Nor will this be a problem if the log I/O is
+        * done in basic blocks (sector size 1).  But otherwise we
+        * extend the buffer by one extra log sector to ensure
+        * there's space to accomodate this possiblility.
+        */
+       if (nbblks > 1 && log->l_sectBBsize > 1)
+               nbblks += log->l_sectBBsize;
+       nbblks = round_up(nbblks, log->l_sectBBsize);
+
+       return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp);
 }
 
-void
+STATIC void
 xlog_put_bp(
        xfs_buf_t       *bp)
 {
        xfs_buf_free(bp);
 }
 
+/*
+ * Return the address of the start of the given block number's data
+ * in a log buffer.  The buffer covers a log sector-aligned region.
+ */
+STATIC xfs_caddr_t
+xlog_align(
+       xlog_t          *log,
+       xfs_daddr_t     blk_no,
+       int             nbblks,
+       xfs_buf_t       *bp)
+{
+       xfs_daddr_t     offset;
+       xfs_caddr_t     ptr;
+
+       offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1);
+       ptr = XFS_BUF_PTR(bp) + BBTOB(offset);
+
+       ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp));
+
+       return ptr;
+}
+
 
 /*
  * nbblks should be uint, but oh well.  Just want to catch that 32-bit length.
  */
-int
-xlog_bread(
+STATIC int
+xlog_bread_noalign(
        xlog_t          *log,
        xfs_daddr_t     blk_no,
        int             nbblks,
@@ -106,14 +156,18 @@ xlog_bread(
 {
        int             error;
 
-       if (log->l_sectbb_log) {
-               blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
-               nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
+       if (!xlog_buf_bbcount_valid(log, nbblks)) {
+               xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+                       nbblks);
+               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
+               return EFSCORRUPTED;
        }
 
+       blk_no = round_down(blk_no, log->l_sectBBsize);
+       nbblks = round_up(nbblks, log->l_sectBBsize);
+
        ASSERT(nbblks > 0);
        ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
-       ASSERT(bp);
 
        XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
        XFS_BUF_READ(bp);
@@ -122,12 +176,31 @@ xlog_bread(
        XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
 
        xfsbdstrat(log->l_mp, bp);
-       if ((error = xfs_iowait(bp)))
+       error = xfs_iowait(bp);
+       if (error)
                xfs_ioerror_alert("xlog_bread", log->l_mp,
                                  bp, XFS_BUF_ADDR(bp));
        return error;
 }
 
+STATIC int
+xlog_bread(
+       xlog_t          *log,
+       xfs_daddr_t     blk_no,
+       int             nbblks,
+       xfs_buf_t       *bp,
+       xfs_caddr_t     *offset)
+{
+       int             error;
+
+       error = xlog_bread_noalign(log, blk_no, nbblks, bp);
+       if (error)
+               return error;
+
+       *offset = xlog_align(log, blk_no, nbblks, bp);
+       return 0;
+}
+
 /*
  * Write out the buffer at the given block for the given number of blocks.
  * The buffer is kept locked across the write and is returned locked.
@@ -142,11 +215,16 @@ xlog_bwrite(
 {
        int             error;
 
-       if (log->l_sectbb_log) {
-               blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
-               nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
+       if (!xlog_buf_bbcount_valid(log, nbblks)) {
+               xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+                       nbblks);
+               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
+               return EFSCORRUPTED;
        }
 
+       blk_no = round_down(blk_no, log->l_sectBBsize);
+       nbblks = round_up(nbblks, log->l_sectBBsize);
+
        ASSERT(nbblks > 0);
        ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
 
@@ -164,24 +242,6 @@ xlog_bwrite(
        return error;
 }
 
-STATIC xfs_caddr_t
-xlog_align(
-       xlog_t          *log,
-       xfs_daddr_t     blk_no,
-       int             nbblks,
-       xfs_buf_t       *bp)
-{
-       xfs_caddr_t     ptr;
-
-       if (!log->l_sectbb_log)
-               return XFS_BUF_PTR(bp);
-
-       ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask);
-       ASSERT(XFS_BUF_SIZE(bp) >=
-               BBTOB(nbblks + (blk_no & log->l_sectbb_mask)));
-       return ptr;
-}
-
 #ifdef DEBUG
 /*
  * dump debug superblock and log record information
@@ -191,16 +251,10 @@ xlog_header_check_dump(
        xfs_mount_t             *mp,
        xlog_rec_header_t       *head)
 {
-       int                     b;
-
-       printk("%s:  SB : uuid = ", __FUNCTION__);
-       for (b = 0; b < 16; b++)
-               printk("%02x",((unsigned char *)&mp->m_sb.sb_uuid)[b]);
-       printk(", fmt = %d\n", XLOG_FMT);
-       printk("    log : uuid = ");
-       for (b = 0; b < 16; b++)
-               printk("%02x",((unsigned char *)&head->h_fs_uuid)[b]);
-       printk(", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT));
+       cmn_err(CE_DEBUG, "%s:  SB : uuid = %pU, fmt = %d\n",
+               __func__, &mp->m_sb.sb_uuid, XLOG_FMT);
+       cmn_err(CE_DEBUG, "    log : uuid = %pU, fmt = %d\n",
+               &head->h_fs_uuid, be32_to_cpu(head->h_fmt));
 }
 #else
 #define xlog_header_check_dump(mp, head)
@@ -214,14 +268,14 @@ xlog_header_check_recover(
        xfs_mount_t             *mp,
        xlog_rec_header_t       *head)
 {
-       ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
+       ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
 
        /*
         * IRIX doesn't write the h_fmt field and leaves it zeroed
         * (XLOG_FMT_UNKNOWN). This stops us from trying to recover
         * a dirty log created in IRIX.
         */
-       if (unlikely(INT_GET(head->h_fmt, ARCH_CONVERT) != XLOG_FMT)) {
+       if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) {
                xlog_warn(
        "XFS: dirty log written in incompatible format - can't recover");
                xlog_header_check_dump(mp, head);
@@ -247,7 +301,7 @@ xlog_header_check_mount(
        xfs_mount_t             *mp,
        xlog_rec_header_t       *head)
 {
-       ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
+       ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
 
        if (uuid_is_nil(&head->h_fs_uuid)) {
                /*
@@ -270,21 +324,16 @@ STATIC void
 xlog_recover_iodone(
        struct xfs_buf  *bp)
 {
-       xfs_mount_t     *mp;
-
-       ASSERT(XFS_BUF_FSPRIVATE(bp, void *));
-
        if (XFS_BUF_GETERROR(bp)) {
                /*
                 * We're not going to bother about retrying
                 * this during recovery. One strike!
                 */
-               mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *);
                xfs_ioerror_alert("xlog_recover_iodone",
-                                 mp, bp, XFS_BUF_ADDR(bp));
-               xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR);
+                                 bp->b_mount, bp, XFS_BUF_ADDR(bp));
+               xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
        }
-       XFS_BUF_SET_FSPRIVATE(bp, NULL);
+       bp->b_mount = NULL;
        XFS_BUF_CLR_IODONE_FUNC(bp);
        xfs_biodone(bp);
 }
@@ -295,7 +344,7 @@ xlog_recover_iodone(
  * Note that the algorithm can not be perfect because the disk will not
  * necessarily be perfect.
  */
-int
+STATIC int
 xlog_find_cycle_start(
        xlog_t          *log,
        xfs_buf_t       *bp,
@@ -305,39 +354,38 @@ xlog_find_cycle_start(
 {
        xfs_caddr_t     offset;
        xfs_daddr_t     mid_blk;
+       xfs_daddr_t     end_blk;
        uint            mid_cycle;
        int             error;
 
-       mid_blk = BLK_AVG(first_blk, *last_blk);
-       while (mid_blk != first_blk && mid_blk != *last_blk) {
-               if ((error = xlog_bread(log, mid_blk, 1, bp)))
+       end_blk = *last_blk;
+       mid_blk = BLK_AVG(first_blk, end_blk);
+       while (mid_blk != first_blk && mid_blk != end_blk) {
+               error = xlog_bread(log, mid_blk, 1, bp, &offset);
+               if (error)
                        return error;
-               offset = xlog_align(log, mid_blk, 1, bp);
-               mid_cycle = GET_CYCLE(offset, ARCH_CONVERT);
-               if (mid_cycle == cycle) {
-                       *last_blk = mid_blk;
-                       /* last_half_cycle == mid_cycle */
-               } else {
-                       first_blk = mid_blk;
-                       /* first_half_cycle == mid_cycle */
-               }
-               mid_blk = BLK_AVG(first_blk, *last_blk);
+               mid_cycle = xlog_get_cycle(offset);
+               if (mid_cycle == cycle)
+                       end_blk = mid_blk;   /* last_half_cycle == mid_cycle */
+               else
+                       first_blk = mid_blk; /* first_half_cycle == mid_cycle */
+               mid_blk = BLK_AVG(first_blk, end_blk);
        }
-       ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) ||
-              (mid_blk == *last_blk && mid_blk-1 == first_blk));
+       ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) ||
+              (mid_blk == end_blk && mid_blk-1 == first_blk));
+
+       *last_blk = end_blk;
 
        return 0;
 }
 
 /*
- * Check that the range of blocks does not contain the cycle number
- * given.  The scan needs to occur from front to back and the ptr into the
- * region must be updated since a later routine will need to perform another
- * test.  If the region is completely good, we end up returning the same
- * last block number.
- *
- * Set blkno to -1 if we encounter no errors.  This is an invalid block number
- * since we don't ever expect logs to get this large.
+ * Check that a range of blocks does not contain stop_on_cycle_no.
+ * Fill in *new_blk with the block offset where such a block is
+ * found, or with -1 (an invalid block number) if there is no such
+ * block in the range.  The scan needs to occur from front to back
+ * and the pointer into the region must be updated since a later
+ * routine will need to perform another test.
  */
 STATIC int
 xlog_find_verify_cycle(
@@ -354,12 +402,16 @@ xlog_find_verify_cycle(
        xfs_caddr_t     buf = NULL;
        int             error = 0;
 
+       /*
+        * Greedily allocate a buffer big enough to handle the full
+        * range of basic blocks we'll be examining.  If that fails,
+        * try a smaller size.  We need to be able to read at least
+        * a log sector, or we're out of luck.
+        */
        bufblks = 1 << ffs(nbblks);
-
        while (!(bp = xlog_get_bp(log, bufblks))) {
-               /* can't get enough memory to do everything in one big buffer */
                bufblks >>= 1;
-               if (bufblks <= log->l_sectbb_log)
+               if (bufblks < log->l_sectBBsize)
                        return ENOMEM;
        }
 
@@ -368,12 +420,12 @@ xlog_find_verify_cycle(
 
                bcount = min(bufblks, (start_blk + nbblks - i));
 
-               if ((error = xlog_bread(log, i, bcount, bp)))
+               error = xlog_bread(log, i, bcount, bp, &buf);
+               if (error)
                        goto out;
 
-               buf = xlog_align(log, i, bcount, bp);
                for (j = 0; j < bcount; j++) {
-                       cycle = GET_CYCLE(buf, ARCH_CONVERT);
+                       cycle = xlog_get_cycle(buf);
                        if (cycle == stop_on_cycle_no) {
                                *new_blk = i+j;
                                goto out;
@@ -425,9 +477,9 @@ xlog_find_verify_log_record(
                        return ENOMEM;
                smallmem = 1;
        } else {
-               if ((error = xlog_bread(log, start_blk, num_blks, bp)))
+               error = xlog_bread(log, start_blk, num_blks, bp, &offset);
+               if (error)
                        goto out;
-               offset = xlog_align(log, start_blk, num_blks, bp);
                offset += ((num_blks - 1) << BBSHIFT);
        }
 
@@ -442,15 +494,14 @@ xlog_find_verify_log_record(
                }
 
                if (smallmem) {
-                       if ((error = xlog_bread(log, i, 1, bp)))
+                       error = xlog_bread(log, i, 1, bp, &offset);
+                       if (error)
                                goto out;
-                       offset = xlog_align(log, i, 1, bp);
                }
 
                head = (xlog_rec_header_t *)offset;
 
-               if (XLOG_HEADER_MAGIC_NUM ==
-                   INT_GET(head->h_magicno, ARCH_CONVERT))
+               if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno))
                        break;
 
                if (!smallmem)
@@ -481,8 +532,8 @@ xlog_find_verify_log_record(
         * reset last_blk.  Only when last_blk points in the middle of a log
         * record do we update last_blk.
         */
-       if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
-               uint    h_size = INT_GET(head->h_size, ARCH_CONVERT);
+       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+               uint    h_size = be32_to_cpu(head->h_size);
 
                xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
                if (h_size % XLOG_HEADER_CYCLE_SIZE)
@@ -491,8 +542,8 @@ xlog_find_verify_log_record(
                xhdrs = 1;
        }
 
-       if (*last_blk - i + extra_bblks
-                       != BTOBB(INT_GET(head->h_len, ARCH_CONVERT)) + xhdrs)
+       if (*last_blk - i + extra_bblks !=
+           BTOBB(be32_to_cpu(head->h_len)) + xhdrs)
                *last_blk = i;
 
 out:
@@ -549,16 +600,19 @@ xlog_find_head(
        bp = xlog_get_bp(log, 1);
        if (!bp)
                return ENOMEM;
-       if ((error = xlog_bread(log, 0, 1, bp)))
+
+       error = xlog_bread(log, 0, 1, bp, &offset);
+       if (error)
                goto bp_err;
-       offset = xlog_align(log, 0, 1, bp);
-       first_half_cycle = GET_CYCLE(offset, ARCH_CONVERT);
+
+       first_half_cycle = xlog_get_cycle(offset);
 
        last_blk = head_blk = log_bbnum - 1;    /* get cycle # of last block */
-       if ((error = xlog_bread(log, last_blk, 1, bp)))
+       error = xlog_bread(log, last_blk, 1, bp, &offset);
+       if (error)
                goto bp_err;
-       offset = xlog_align(log, last_blk, 1, bp);
-       last_half_cycle = GET_CYCLE(offset, ARCH_CONVERT);
+
+       last_half_cycle = xlog_get_cycle(offset);
        ASSERT(last_half_cycle != 0);
 
        /*
@@ -605,7 +659,7 @@ xlog_find_head(
                 * In this case we want to find the first block with cycle
                 * number matching last_half_cycle.  We expect the log to be
                 * some variation on
-                *        x + 1 ... | x ...
+                *        x + 1 ... | x ... | x
                 * The first block with cycle number x (last_half_cycle) will
                 * be where the new head belongs.  First we do a binary search
                 * for the first occurrence of last_half_cycle.  The binary
@@ -615,11 +669,13 @@ xlog_find_head(
                 * the log, then we look for occurrences of last_half_cycle - 1
                 * at the end of the log.  The cases we're looking for look
                 * like
-                *        x + 1 ... | x | x + 1 | x ...
-                *                               ^ binary search stopped here
+                *                               v binary search stopped here
+                *        x + 1 ... | x | x + 1 | x ... | x
+                *                   ^ but we want to locate this spot
                 * or
-                *        x + 1 ... | x ... | x - 1 | x
                 *        <---------> less than scan distance
+                *        x + 1 ... | x ... | x - 1 | x
+                *                           ^ we want to locate this spot
                 */
                stop_on_cycle = last_half_cycle;
                if ((error = xlog_find_cycle_start(log, bp, first_blk,
@@ -675,16 +731,16 @@ xlog_find_head(
                 * certainly not the head of the log.  By searching for
                 * last_half_cycle-1 we accomplish that.
                 */
-               start_blk = log_bbnum - num_scan_bblks + head_blk;
                ASSERT(head_blk <= INT_MAX &&
-                       (xfs_daddr_t) num_scan_bblks - head_blk >= 0);
+                       (xfs_daddr_t) num_scan_bblks >= head_blk);
+               start_blk = log_bbnum - (num_scan_bblks - head_blk);
                if ((error = xlog_find_verify_cycle(log, start_blk,
                                        num_scan_bblks - (int)head_blk,
                                        (stop_on_cycle - 1), &new_blk)))
                        goto bp_err;
                if (new_blk != -1) {
                        head_blk = new_blk;
-                       goto bad_blk;
+                       goto validate_head;
                }
 
                /*
@@ -702,7 +758,7 @@ xlog_find_head(
                        head_blk = new_blk;
        }
 
- bad_blk:
+validate_head:
        /*
         * Now we need to make sure head_blk is not pointing to a block in
         * the middle of a log record.
@@ -724,7 +780,7 @@ xlog_find_head(
                if ((error = xlog_find_verify_log_record(log, start_blk,
                                                        &head_blk, 0)) == -1) {
                        /* We hit the beginning of the log during our search */
-                       start_blk = log_bbnum - num_scan_bblks + head_blk;
+                       start_blk = log_bbnum - (num_scan_bblks - head_blk);
                        new_blk = log_bbnum;
                        ASSERT(start_blk <= INT_MAX &&
                                (xfs_daddr_t) log_bbnum-start_blk >= 0);
@@ -779,7 +835,7 @@ xlog_find_head(
  * We could speed up search by using current head_blk buffer, but it is not
  * available.
  */
-int
+STATIC int
 xlog_find_tail(
        xlog_t                  *log,
        xfs_daddr_t             *head_blk,
@@ -807,13 +863,14 @@ xlog_find_tail(
        if (!bp)
                return ENOMEM;
        if (*head_blk == 0) {                           /* special case */
-               if ((error = xlog_bread(log, 0, 1, bp)))
-                       goto bread_err;
-               offset = xlog_align(log, 0, 1, bp);
-               if (GET_CYCLE(offset, ARCH_CONVERT) == 0) {
+               error = xlog_bread(log, 0, 1, bp, &offset);
+               if (error)
+                       goto done;
+
+               if (xlog_get_cycle(offset) == 0) {
                        *tail_blk = 0;
                        /* leave all other log inited values alone */
-                       goto exit;
+                       goto done;
                }
        }
 
@@ -822,11 +879,11 @@ xlog_find_tail(
         */
        ASSERT(*head_blk < INT_MAX);
        for (i = (int)(*head_blk) - 1; i >= 0; i--) {
-               if ((error = xlog_bread(log, i, 1, bp)))
-                       goto bread_err;
-               offset = xlog_align(log, i, 1, bp);
-               if (XLOG_HEADER_MAGIC_NUM ==
-                   INT_GET(*(uint *)offset, ARCH_CONVERT)) {
+               error = xlog_bread(log, i, 1, bp, &offset);
+               if (error)
+                       goto done;
+
+               if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
                        found = 1;
                        break;
                }
@@ -839,11 +896,12 @@ xlog_find_tail(
         */
        if (!found) {
                for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
-                       if ((error = xlog_bread(log, i, 1, bp)))
-                               goto bread_err;
-                       offset = xlog_align(log, i, 1, bp);
+                       error = xlog_bread(log, i, 1, bp, &offset);
+                       if (error)
+                               goto done;
+
                        if (XLOG_HEADER_MAGIC_NUM ==
-                           INT_GET(*(uint*)offset, ARCH_CONVERT)) {
+                           be32_to_cpu(*(__be32 *)offset)) {
                                found = 2;
                                break;
                        }
@@ -857,7 +915,7 @@ xlog_find_tail(
 
        /* find blk_no of tail of log */
        rhead = (xlog_rec_header_t *)offset;
-       *tail_blk = BLOCK_LSN(INT_GET(rhead->h_tail_lsn, ARCH_CONVERT));
+       *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
 
        /*
         * Reset log values according to the state of the log when we
@@ -871,11 +929,11 @@ xlog_find_tail(
         */
        log->l_prev_block = i;
        log->l_curr_block = (int)*head_blk;
-       log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT);
+       log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
        if (found == 2)
                log->l_curr_cycle++;
-       log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT);
-       log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT);
+       log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn);
+       log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn);
        log->l_grant_reserve_cycle = log->l_curr_cycle;
        log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
        log->l_grant_write_cycle = log->l_curr_cycle;
@@ -892,9 +950,9 @@ xlog_find_tail(
         * unmount record if there is one, so we pass the lsn of the
         * unmount record rather than the block after it.
         */
-       if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
-               int     h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
-               int     h_version = INT_GET(rhead->h_version, ARCH_CONVERT);
+       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+               int     h_size = be32_to_cpu(rhead->h_size);
+               int     h_version = be32_to_cpu(rhead->h_version);
 
                if ((h_version & XLOG_VERSION_2) &&
                    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
@@ -908,15 +966,15 @@ xlog_find_tail(
                hblks = 1;
        }
        after_umount_blk = (i + hblks + (int)
-               BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize;
+               BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
        tail_lsn = log->l_tail_lsn;
        if (*head_blk == after_umount_blk &&
-           INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) {
+           be32_to_cpu(rhead->h_num_logops) == 1) {
                umount_data_blk = (i + hblks) % log->l_logBBsize;
-               if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
-                       goto bread_err;
-               }
-               offset = xlog_align(log, umount_data_blk, 1, bp);
+               error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+               if (error)
+                       goto done;
+
                op_head = (xlog_op_header_t *)offset;
                if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
                        /*
@@ -924,11 +982,21 @@ xlog_find_tail(
                         * log records will point recovery to after the
                         * current unmount record.
                         */
-                       ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, log->l_curr_cycle,
-                                       after_umount_blk);
-                       ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle,
-                                       after_umount_blk);
+                       log->l_tail_lsn =
+                               xlog_assign_lsn(log->l_curr_cycle,
+                                               after_umount_blk);
+                       log->l_last_sync_lsn =
+                               xlog_assign_lsn(log->l_curr_cycle,
+                                               after_umount_blk);
                        *tail_blk = after_umount_blk;
+
+                       /*
+                        * Note that the unmount was clean. If the unmount
+                        * was not clean, we need to know this to rebuild the
+                        * superblock counters from the perag headers if we
+                        * have a filesystem using non-persistent counters.
+                        */
+                       log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
                }
        }
 
@@ -951,12 +1019,10 @@ xlog_find_tail(
         * But... if the -device- itself is readonly, just skip this.
         * We can't recover this device anyway, so it won't matter.
         */
-       if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
+       if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp))
                error = xlog_clear_stale_blocks(log, tail_lsn);
-       }
 
-bread_err:
-exit:
+done:
        xlog_put_bp(bp);
 
        if (error)
@@ -980,7 +1046,7 @@ exit:
  *     -1 => use *blk_no as the first block of the log
  *     >0 => error has occurred
  */
-int
+STATIC int
 xlog_find_zeroed(
        xlog_t          *log,
        xfs_daddr_t     *blk_no)
@@ -992,14 +1058,17 @@ xlog_find_zeroed(
        xfs_daddr_t     num_scan_bblks;
        int             error, log_bbnum = log->l_logBBsize;
 
+       *blk_no = 0;
+
        /* check totally zeroed log */
        bp = xlog_get_bp(log, 1);
        if (!bp)
                return ENOMEM;
-       if ((error = xlog_bread(log, 0, 1, bp)))
+       error = xlog_bread(log, 0, 1, bp, &offset);
+       if (error)
                goto bp_err;
-       offset = xlog_align(log, 0, 1, bp);
-       first_cycle = GET_CYCLE(offset, ARCH_CONVERT);
+
+       first_cycle = xlog_get_cycle(offset);
        if (first_cycle == 0) {         /* completely zeroed log */
                *blk_no = 0;
                xlog_put_bp(bp);
@@ -1007,10 +1076,11 @@ xlog_find_zeroed(
        }
 
        /* check partially zeroed log */
-       if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
+       error = xlog_bread(log, log_bbnum-1, 1, bp, &offset);
+       if (error)
                goto bp_err;
-       offset = xlog_align(log, log_bbnum-1, 1, bp);
-       last_cycle = GET_CYCLE(offset, ARCH_CONVERT);
+
+       last_cycle = xlog_get_cycle(offset);
        if (last_cycle != 0) {          /* log completely written to */
                xlog_put_bp(bp);
                return 0;
@@ -1090,13 +1160,13 @@ xlog_add_record(
        xlog_rec_header_t       *recp = (xlog_rec_header_t *)buf;
 
        memset(buf, 0, BBSIZE);
-       INT_SET(recp->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM);
-       INT_SET(recp->h_cycle, ARCH_CONVERT, cycle);
-       INT_SET(recp->h_version, ARCH_CONVERT,
-                       XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1);
-       ASSIGN_ANY_LSN_DISK(recp->h_lsn, cycle, block);
-       ASSIGN_ANY_LSN_DISK(recp->h_tail_lsn, tail_cycle, tail_block);
-       INT_SET(recp->h_fmt, ARCH_CONVERT, XLOG_FMT);
+       recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
+       recp->h_cycle = cpu_to_be32(cycle);
+       recp->h_version = cpu_to_be32(
+                       xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
+       recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block));
+       recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block));
+       recp->h_fmt = cpu_to_be32(XLOG_FMT);
        memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t));
 }
 
@@ -1112,16 +1182,22 @@ xlog_write_log_records(
        xfs_caddr_t     offset;
        xfs_buf_t       *bp;
        int             balign, ealign;
-       int             sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
+       int             sectbb = log->l_sectBBsize;
        int             end_block = start_block + blocks;
        int             bufblks;
        int             error = 0;
        int             i, j = 0;
 
+       /*
+        * Greedily allocate a buffer big enough to handle the full
+        * range of basic blocks to be written.  If that fails, try
+        * a smaller size.  We need to be able to write at least a
+        * log sector, or we're out of luck.
+        */
        bufblks = 1 << ffs(blocks);
        while (!(bp = xlog_get_bp(log, bufblks))) {
                bufblks >>= 1;
-               if (bufblks <= log->l_sectbb_log)
+               if (bufblks < sectbb)
                        return ENOMEM;
        }
 
@@ -1129,12 +1205,12 @@ xlog_write_log_records(
         * the buffer in the starting sector not covered by the first
         * write below.
         */
-       balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block);
+       balign = round_down(start_block, sectbb);
        if (balign != start_block) {
-               if ((error = xlog_bread(log, start_block, 1, bp))) {
-                       xlog_put_bp(bp);
-                       return error;
-               }
+               error = xlog_bread_noalign(log, start_block, 1, bp);
+               if (error)
+                       goto out_put_bp;
+
                j = start_block - balign;
        }
 
@@ -1148,14 +1224,22 @@ xlog_write_log_records(
                 * the buffer in the final sector not covered by the write.
                 * If this is the same sector as the above read, skip it.
                 */
-               ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block);
+               ealign = round_down(end_block, sectbb);
                if (j == 0 && (start_block + endcount > ealign)) {
                        offset = XFS_BUF_PTR(bp);
                        balign = BBTOB(ealign - start_block);
-                       XFS_BUF_SET_PTR(bp, offset + balign, BBTOB(sectbb));
-                       if ((error = xlog_bread(log, ealign, sectbb, bp)))
+                       error = XFS_BUF_SET_PTR(bp, offset + balign,
+                                               BBTOB(sectbb));
+                       if (error)
+                               break;
+
+                       error = xlog_bread_noalign(log, ealign, sectbb, bp);
+                       if (error)
+                               break;
+
+                       error = XFS_BUF_SET_PTR(bp, offset, bufblks);
+                       if (error)
                                break;
-                       XFS_BUF_SET_PTR(bp, offset, bufblks);
                }
 
                offset = xlog_align(log, start_block, endcount, bp);
@@ -1170,6 +1254,8 @@ xlog_write_log_records(
                start_block += endcount;
                j = 0;
        }
+
+ out_put_bp:
        xlog_put_bp(bp);
        return error;
 }
@@ -1315,40 +1401,50 @@ xlog_clear_stale_blocks(
 
 STATIC xlog_recover_t *
 xlog_recover_find_tid(
-       xlog_recover_t          *q,
+       struct hlist_head       *head,
        xlog_tid_t              tid)
 {
-       xlog_recover_t          *p = q;
+       xlog_recover_t          *trans;
+       struct hlist_node       *n;
 
-       while (p != NULL) {
-               if (p->r_log_tid == tid)
-                   break;
-               p = p->r_next;
+       hlist_for_each_entry(trans, n, head, r_list) {
+               if (trans->r_log_tid == tid)
+                       return trans;
        }
-       return p;
+       return NULL;
 }
 
 STATIC void
-xlog_recover_put_hashq(
-       xlog_recover_t          **q,
-       xlog_recover_t          *trans)
+xlog_recover_new_tid(
+       struct hlist_head       *head,
+       xlog_tid_t              tid,
+       xfs_lsn_t               lsn)
 {
-       trans->r_next = *q;
-       *q = trans;
+       xlog_recover_t          *trans;
+
+       trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
+       trans->r_log_tid   = tid;
+       trans->r_lsn       = lsn;
+       INIT_LIST_HEAD(&trans->r_itemq);
+
+       INIT_HLIST_NODE(&trans->r_list);
+       hlist_add_head(&trans->r_list, head);
 }
 
 STATIC void
 xlog_recover_add_item(
-       xlog_recover_item_t     **itemq)
+       struct list_head        *head)
 {
        xlog_recover_item_t     *item;
 
        item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
-       xlog_recover_insert_item_backq(itemq, item);
+       INIT_LIST_HEAD(&item->ri_list);
+       list_add_tail(&item->ri_list, head);
 }
 
 STATIC int
 xlog_recover_add_to_cont_trans(
+       struct log              *log,
        xlog_recover_t          *trans,
        xfs_caddr_t             dp,
        int                     len)
@@ -1357,8 +1453,7 @@ xlog_recover_add_to_cont_trans(
        xfs_caddr_t             ptr, old_ptr;
        int                     old_len;
 
-       item = trans->r_itemq;
-       if (item == 0) {
+       if (list_empty(&trans->r_itemq)) {
                /* finish copying rest of trans header */
                xlog_recover_add_item(&trans->r_itemq);
                ptr = (xfs_caddr_t) &trans->r_theader +
@@ -1366,7 +1461,8 @@ xlog_recover_add_to_cont_trans(
                memcpy(ptr, dp, len); /* d, s, l */
                return 0;
        }
-       item = item->ri_prev;
+       /* take the tail entry */
+       item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
 
        old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
        old_len = item->ri_buf[item->ri_cnt-1].i_len;
@@ -1375,6 +1471,7 @@ xlog_recover_add_to_cont_trans(
        memcpy(&ptr[old_len], dp, len); /* d, s, l */
        item->ri_buf[item->ri_cnt-1].i_len += len;
        item->ri_buf[item->ri_cnt-1].i_addr = ptr;
+       trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
        return 0;
 }
 
@@ -1393,6 +1490,7 @@ xlog_recover_add_to_cont_trans(
  */
 STATIC int
 xlog_recover_add_to_trans(
+       struct log              *log,
        xlog_recover_t          *trans,
        xfs_caddr_t             dp,
        int                     len)
@@ -1403,9 +1501,14 @@ xlog_recover_add_to_trans(
 
        if (!len)
                return 0;
-       item = trans->r_itemq;
-       if (item == 0) {
-               ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
+       if (list_empty(&trans->r_itemq)) {
+               /* we need to catch log corruptions here */
+               if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
+                       xlog_warn("XFS: xlog_recover_add_to_trans: "
+                                 "bad header magic number");
+                       ASSERT(0);
+                       return XFS_ERROR(EIO);
+               }
                if (len == sizeof(xfs_trans_header_t))
                        xlog_recover_add_item(&trans->r_itemq);
                memcpy(&trans->r_theader, dp, len); /* d, s, l */
@@ -1416,140 +1519,77 @@ xlog_recover_add_to_trans(
        memcpy(ptr, dp, len);
        in_f = (xfs_inode_log_format_t *)ptr;
 
-       if (item->ri_prev->ri_total != 0 &&
-            item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
+       /* take the tail entry */
+       item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
+       if (item->ri_total != 0 &&
+            item->ri_total == item->ri_cnt) {
+               /* tail item is in use, get a new one */
                xlog_recover_add_item(&trans->r_itemq);
+               item = list_entry(trans->r_itemq.prev,
+                                       xlog_recover_item_t, ri_list);
        }
-       item = trans->r_itemq;
-       item = item->ri_prev;
 
        if (item->ri_total == 0) {              /* first region to be added */
-               item->ri_total  = in_f->ilf_size;
-               ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM);
-               item->ri_buf = kmem_zalloc((item->ri_total *
-                                           sizeof(xfs_log_iovec_t)), KM_SLEEP);
+               if (in_f->ilf_size == 0 ||
+                   in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
+                       xlog_warn(
+       "XFS: bad number of regions (%d) in inode log format",
+                                 in_f->ilf_size);
+                       ASSERT(0);
+                       return XFS_ERROR(EIO);
+               }
+
+               item->ri_total = in_f->ilf_size;
+               item->ri_buf =
+                       kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
+                                   KM_SLEEP);
        }
        ASSERT(item->ri_total > item->ri_cnt);
        /* Description region is ri_buf[0] */
        item->ri_buf[item->ri_cnt].i_addr = ptr;
        item->ri_buf[item->ri_cnt].i_len  = len;
        item->ri_cnt++;
+       trace_xfs_log_recover_item_add(log, trans, item, 0);
        return 0;
 }
 
-STATIC void
-xlog_recover_new_tid(
-       xlog_recover_t          **q,
-       xlog_tid_t              tid,
-       xfs_lsn_t               lsn)
-{
-       xlog_recover_t          *trans;
-
-       trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
-       trans->r_log_tid   = tid;
-       trans->r_lsn       = lsn;
-       xlog_recover_put_hashq(q, trans);
-}
-
-STATIC int
-xlog_recover_unlink_tid(
-       xlog_recover_t          **q,
-       xlog_recover_t          *trans)
-{
-       xlog_recover_t          *tp;
-       int                     found = 0;
-
-       ASSERT(trans != 0);
-       if (trans == *q) {
-               *q = (*q)->r_next;
-       } else {
-               tp = *q;
-               while (tp != 0) {
-                       if (tp->r_next == trans) {
-                               found = 1;
-                               break;
-                       }
-                       tp = tp->r_next;
-               }
-               if (!found) {
-                       xlog_warn(
-                            "XFS: xlog_recover_unlink_tid: trans not found");
-                       ASSERT(0);
-                       return XFS_ERROR(EIO);
-               }
-               tp->r_next = tp->r_next->r_next;
-       }
-       return 0;
-}
-
-STATIC void
-xlog_recover_insert_item_backq(
-       xlog_recover_item_t     **q,
-       xlog_recover_item_t     *item)
-{
-       if (*q == 0) {
-               item->ri_prev = item->ri_next = item;
-               *q = item;
-       } else {
-               item->ri_next           = *q;
-               item->ri_prev           = (*q)->ri_prev;
-               (*q)->ri_prev           = item;
-               item->ri_prev->ri_next  = item;
-       }
-}
-
-STATIC void
-xlog_recover_insert_item_frontq(
-       xlog_recover_item_t     **q,
-       xlog_recover_item_t     *item)
-{
-       xlog_recover_insert_item_backq(q, item);
-       *q = item;
-}
-
+/*
+ * Sort the log items in the transaction. Cancelled buffers need
+ * to be put first so they are processed before any items that might
+ * modify the buffers. If they are cancelled, then the modifications
+ * don't need to be replayed.
+ */
 STATIC int
 xlog_recover_reorder_trans(
-       xlog_t                  *log,
-       xlog_recover_t          *trans)
+       struct log              *log,
+       xlog_recover_t          *trans,
+       int                     pass)
 {
-       xlog_recover_item_t     *first_item, *itemq, *itemq_next;
-       xfs_buf_log_format_t    *buf_f;
-       xfs_buf_log_format_v1_t *obuf_f;
-       ushort                  flags = 0;
+       xlog_recover_item_t     *item, *n;
+       LIST_HEAD(sort_list);
 
-       first_item = itemq = trans->r_itemq;
-       trans->r_itemq = NULL;
-       do {
-               itemq_next = itemq->ri_next;
-               buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr;
-               switch (ITEM_TYPE(itemq)) {
-               case XFS_LI_BUF:
-                       flags = buf_f->blf_flags;
-                       break;
-               case XFS_LI_6_1_BUF:
-               case XFS_LI_5_3_BUF:
-                       obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
-                       flags = obuf_f->blf_flags;
-                       break;
-               }
+       list_splice_init(&trans->r_itemq, &sort_list);
+       list_for_each_entry_safe(item, n, &sort_list, ri_list) {
+               xfs_buf_log_format_t    *buf_f;
+
+               buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
 
-               switch (ITEM_TYPE(itemq)) {
+               switch (ITEM_TYPE(item)) {
                case XFS_LI_BUF:
-               case XFS_LI_6_1_BUF:
-               case XFS_LI_5_3_BUF:
-                       if (!(flags & XFS_BLI_CANCEL)) {
-                               xlog_recover_insert_item_frontq(&trans->r_itemq,
-                                                               itemq);
+                       if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) {
+                               trace_xfs_log_recover_item_reorder_head(log,
+                                                       trans, item, pass);
+                               list_move(&item->ri_list, &trans->r_itemq);
                                break;
                        }
                case XFS_LI_INODE:
-               case XFS_LI_6_1_INODE:
-               case XFS_LI_5_3_INODE:
                case XFS_LI_DQUOT:
                case XFS_LI_QUOTAOFF:
                case XFS_LI_EFD:
                case XFS_LI_EFI:
-                       xlog_recover_insert_item_backq(&trans->r_itemq, itemq);
+                       trace_xfs_log_recover_item_reorder_tail(log,
+                                                       trans, item, pass);
+                       list_move_tail(&item->ri_list, &trans->r_itemq);
                        break;
                default:
                        xlog_warn(
@@ -1557,8 +1597,8 @@ xlog_recover_reorder_trans(
                        ASSERT(0);
                        return XFS_ERROR(EIO);
                }
-               itemq = itemq_next;
-       } while (first_item != itemq);
+       }
+       ASSERT(list_empty(&sort_list));
        return 0;
 }
 
@@ -1583,7 +1623,6 @@ xlog_recover_do_buffer_pass1(
        xfs_buf_cancel_t        *nextp;
        xfs_buf_cancel_t        *prevp;
        xfs_buf_cancel_t        **bucket;
-       xfs_buf_log_format_v1_t *obuf_f;
        xfs_daddr_t             blkno = 0;
        uint                    len = 0;
        ushort                  flags = 0;
@@ -1594,20 +1633,15 @@ xlog_recover_do_buffer_pass1(
                len = buf_f->blf_len;
                flags = buf_f->blf_flags;
                break;
-       case XFS_LI_6_1_BUF:
-       case XFS_LI_5_3_BUF:
-               obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
-               blkno = (xfs_daddr_t) obuf_f->blf_blkno;
-               len = obuf_f->blf_len;
-               flags = obuf_f->blf_flags;
-               break;
        }
 
        /*
         * If this isn't a cancel buffer item, then just return.
         */
-       if (!(flags & XFS_BLI_CANCEL))
+       if (!(flags & XFS_BLI_CANCEL)) {
+               trace_xfs_log_recover_buf_not_cancel(log, buf_f);
                return;
+       }
 
        /*
         * Insert an xfs_buf_cancel record into the hash table of
@@ -1641,6 +1675,7 @@ xlog_recover_do_buffer_pass1(
        while (nextp != NULL) {
                if (nextp->bc_blkno == blkno && nextp->bc_len == len) {
                        nextp->bc_refcount++;
+                       trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
                        return;
                }
                prevp = nextp;
@@ -1654,6 +1689,7 @@ xlog_recover_do_buffer_pass1(
        bcp->bc_refcount = 1;
        bcp->bc_next = NULL;
        prevp->bc_next = bcp;
+       trace_xfs_log_recover_buf_cancel_add(log, buf_f);
 }
 
 /*
@@ -1724,8 +1760,7 @@ xlog_check_buffer_cancelled(
                                        } else {
                                                prevp->bc_next = bcp->bc_next;
                                        }
-                                       kmem_free(bcp,
-                                                 sizeof(xfs_buf_cancel_t));
+                                       kmem_free(bcp);
                                }
                        }
                        return 1;
@@ -1746,7 +1781,6 @@ xlog_recover_do_buffer_pass2(
        xlog_t                  *log,
        xfs_buf_log_format_t    *buf_f)
 {
-       xfs_buf_log_format_v1_t *obuf_f;
        xfs_daddr_t             blkno = 0;
        ushort                  flags = 0;
        uint                    len = 0;
@@ -1757,13 +1791,6 @@ xlog_recover_do_buffer_pass2(
                flags = buf_f->blf_flags;
                len = buf_f->blf_len;
                break;
-       case XFS_LI_6_1_BUF:
-       case XFS_LI_5_3_BUF:
-               obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
-               blkno = (xfs_daddr_t) obuf_f->blf_blkno;
-               flags = obuf_f->blf_flags;
-               len = (xfs_daddr_t) obuf_f->blf_len;
-               break;
        }
 
        return xlog_check_buffer_cancelled(log, blkno, len, flags);
@@ -1799,21 +1826,16 @@ xlog_recover_do_inode_buffer(
        int                     inodes_per_buf;
        xfs_agino_t             *logged_nextp;
        xfs_agino_t             *buffer_nextp;
-       xfs_buf_log_format_v1_t *obuf_f;
        unsigned int            *data_map = NULL;
        unsigned int            map_size = 0;
 
+       trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
+
        switch (buf_f->blf_type) {
        case XFS_LI_BUF:
                data_map = buf_f->blf_data_map;
                map_size = buf_f->blf_map_size;
                break;
-       case XFS_LI_6_1_BUF:
-       case XFS_LI_5_3_BUF:
-               obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
-               data_map = obuf_f->blf_data_map;
-               map_size = obuf_f->blf_map_size;
-               break;
        }
        /*
         * Set the variables corresponding to the current region to
@@ -1889,7 +1911,7 @@ xlog_recover_do_inode_buffer(
 
                buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
                                              next_unlinked_offset);
-               INT_SET(*buffer_nextp, ARCH_CONVERT, *logged_nextp);
+               *buffer_nextp = *logged_nextp;
        }
 
        return 0;
@@ -1904,7 +1926,7 @@ xlog_recover_do_inode_buffer(
 /*ARGSUSED*/
 STATIC void
 xlog_recover_do_reg_buffer(
-       xfs_mount_t             *mp,
+       struct xfs_mount        *mp,
        xlog_recover_item_t     *item,
        xfs_buf_t               *bp,
        xfs_buf_log_format_t    *buf_f)
@@ -1912,22 +1934,17 @@ xlog_recover_do_reg_buffer(
        int                     i;
        int                     bit;
        int                     nbits;
-       xfs_buf_log_format_v1_t *obuf_f;
        unsigned int            *data_map = NULL;
        unsigned int            map_size = 0;
        int                     error;
 
+       trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
+
        switch (buf_f->blf_type) {
        case XFS_LI_BUF:
                data_map = buf_f->blf_data_map;
                map_size = buf_f->blf_map_size;
                break;
-       case XFS_LI_6_1_BUF:
-       case XFS_LI_5_3_BUF:
-               obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
-               data_map = obuf_f->blf_data_map;
-               map_size = obuf_f->blf_map_size;
-               break;
        }
        bit = 0;
        i = 1;  /* 0 is the buf format structure */
@@ -1937,7 +1954,7 @@ xlog_recover_do_reg_buffer(
                        break;
                nbits = xfs_contig_bits(data_map, map_size, bit);
                ASSERT(nbits > 0);
-               ASSERT(item->ri_buf[i].i_addr != 0);
+               ASSERT(item->ri_buf[i].i_addr != NULL);
                ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0);
                ASSERT(XFS_BUF_COUNT(bp) >=
                       ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT));
@@ -1950,16 +1967,30 @@ xlog_recover_do_reg_buffer(
                error = 0;
                if (buf_f->blf_flags &
                   (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
+                       if (item->ri_buf[i].i_addr == NULL) {
+                               cmn_err(CE_ALERT,
+                                       "XFS: NULL dquot in %s.", __func__);
+                               goto next;
+                       }
+                       if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) {
+                               cmn_err(CE_ALERT,
+                                       "XFS: dquot too small (%d) in %s.",
+                                       item->ri_buf[i].i_len, __func__);
+                               goto next;
+                       }
                        error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
                                               item->ri_buf[i].i_addr,
                                               -1, 0, XFS_QMOPT_DOWARN,
                                               "dquot_buf_recover");
+                       if (error)
+                               goto next;
                }
-               if (!error)
-                       memcpy(xfs_buf_offset(bp,
-                               (uint)bit << XFS_BLI_SHIFT),    /* dest */
-                               item->ri_buf[i].i_addr,         /* source */
-                               nbits<<XFS_BLI_SHIFT);          /* length */
+
+               memcpy(xfs_buf_offset(bp,
+                       (uint)bit << XFS_BLI_SHIFT),    /* dest */
+                       item->ri_buf[i].i_addr,         /* source */
+                       nbits<<XFS_BLI_SHIFT);          /* length */
+ next:
                i++;
                bit += nbits;
        }
@@ -2107,6 +2138,8 @@ xlog_recover_do_dquot_buffer(
 {
        uint                    type;
 
+       trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
+
        /*
         * Filesystems are required to send in quota flags at mount time.
         */
@@ -2160,7 +2193,6 @@ xlog_recover_do_buffer_trans(
        int                     pass)
 {
        xfs_buf_log_format_t    *buf_f;
-       xfs_buf_log_format_v1_t *obuf_f;
        xfs_mount_t             *mp;
        xfs_buf_t               *bp;
        int                     error;
@@ -2168,6 +2200,7 @@ xlog_recover_do_buffer_trans(
        xfs_daddr_t             blkno;
        int                     len;
        ushort                  flags;
+       uint                    buf_flags;
 
        buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
 
@@ -2188,22 +2221,17 @@ xlog_recover_do_buffer_trans(
                 */
                cancel = xlog_recover_do_buffer_pass2(log, buf_f);
                if (cancel) {
+                       trace_xfs_log_recover_buf_cancel(log, buf_f);
                        return 0;
                }
        }
+       trace_xfs_log_recover_buf_recover(log, buf_f);
        switch (buf_f->blf_type) {
        case XFS_LI_BUF:
                blkno = buf_f->blf_blkno;
                len = buf_f->blf_len;
                flags = buf_f->blf_flags;
                break;
-       case XFS_LI_6_1_BUF:
-       case XFS_LI_5_3_BUF:
-               obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
-               blkno = obuf_f->blf_blkno;
-               len = obuf_f->blf_len;
-               flags = obuf_f->blf_flags;
-               break;
        default:
                xfs_fs_cmn_err(CE_ALERT, log->l_mp,
                        "xfs_log_recover: unknown buffer type 0x%x, logdev %s",
@@ -2215,12 +2243,11 @@ xlog_recover_do_buffer_trans(
        }
 
        mp = log->l_mp;
-       if (flags & XFS_BLI_INODE_BUF) {
-               bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len,
-                                                               XFS_BUF_LOCK);
-       } else {
-               bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0);
-       }
+       buf_flags = XBF_LOCK;
+       if (!(flags & XFS_BLI_INODE_BUF))
+               buf_flags |= XBF_MAPPED;
+
+       bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags);
        if (XFS_BUF_ISERROR(bp)) {
                xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp,
                                  bp, blkno);
@@ -2257,15 +2284,14 @@ xlog_recover_do_buffer_trans(
         * overlap with future reads of those inodes.
         */
        if (XFS_DINODE_MAGIC ==
-           INT_GET(*((__uint16_t *)(xfs_buf_offset(bp, 0))), ARCH_CONVERT) &&
+           be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
            (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize,
                        (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) {
                XFS_BUF_STALE(bp);
                error = xfs_bwrite(mp, bp);
        } else {
-               ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
-                      XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp);
-               XFS_BUF_SET_FSPRIVATE(bp, mp);
+               ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
+               bp->b_mount = mp;
                XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
                xfs_bdwrite(mp, bp);
        }
@@ -2282,7 +2308,6 @@ xlog_recover_do_inode_trans(
        xfs_inode_log_format_t  *in_f;
        xfs_mount_t             *mp;
        xfs_buf_t               *bp;
-       xfs_imap_t              imap;
        xfs_dinode_t            *dip;
        xfs_ino_t               ino;
        int                     len;
@@ -2291,68 +2316,66 @@ xlog_recover_do_inode_trans(
        int                     error;
        int                     attr_index;
        uint                    fields;
-       xfs_dinode_core_t       *dicp;
+       xfs_icdinode_t          *dicp;
+       int                     need_free = 0;
 
        if (pass == XLOG_RECOVER_PASS1) {
                return 0;
        }
 
-       in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr;
-       ino = in_f->ilf_ino;
-       mp = log->l_mp;
-       if (ITEM_TYPE(item) == XFS_LI_INODE) {
-               imap.im_blkno = (xfs_daddr_t)in_f->ilf_blkno;
-               imap.im_len = in_f->ilf_len;
-               imap.im_boffset = in_f->ilf_boffset;
+       if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
+               in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr;
        } else {
-               /*
-                * It's an old inode format record.  We don't know where
-                * its cluster is located on disk, and we can't allow
-                * xfs_imap() to figure it out because the inode btrees
-                * are not ready to be used.  Therefore do not pass the
-                * XFS_IMAP_LOOKUP flag to xfs_imap().  This will give
-                * us only the single block in which the inode lives
-                * rather than its cluster, so we must make sure to
-                * invalidate the buffer when we write it out below.
-                */
-               imap.im_blkno = 0;
-               xfs_imap(log->l_mp, NULL, ino, &imap, 0);
+               in_f = (xfs_inode_log_format_t *)kmem_alloc(
+                       sizeof(xfs_inode_log_format_t), KM_SLEEP);
+               need_free = 1;
+               error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
+               if (error)
+                       goto error;
        }
+       ino = in_f->ilf_ino;
+       mp = log->l_mp;
 
        /*
         * Inode buffers can be freed, look out for it,
         * and do not replay the inode.
         */
-       if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0))
-               return 0;
+       if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
+                                       in_f->ilf_len, 0)) {
+               error = 0;
+               trace_xfs_log_recover_inode_cancel(log, in_f);
+               goto error;
+       }
+       trace_xfs_log_recover_inode_recover(log, in_f);
 
-       bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len,
-                                                               XFS_BUF_LOCK);
+       bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
+                         XBF_LOCK);
        if (XFS_BUF_ISERROR(bp)) {
                xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
-                                 bp, imap.im_blkno);
+                                 bp, in_f->ilf_blkno);
                error = XFS_BUF_GETERROR(bp);
                xfs_buf_relse(bp);
-               return error;
+               goto error;
        }
        error = 0;
        ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
-       dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
+       dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
 
        /*
         * Make sure the place we're flushing out to really looks
         * like an inode!
         */
-       if (unlikely(INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC)) {
+       if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) {
                xfs_buf_relse(bp);
                xfs_fs_cmn_err(CE_ALERT, mp,
                        "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld",
                        dip, bp, ino);
                XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)",
                                 XFS_ERRLEVEL_LOW, mp);
-               return XFS_ERROR(EFSCORRUPTED);
+               error = EFSCORRUPTED;
+               goto error;
        }
-       dicp = (xfs_dinode_core_t*)(item->ri_buf[1].i_addr);
+       dicp = (xfs_icdinode_t *)(item->ri_buf[1].i_addr);
        if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
                xfs_buf_relse(bp);
                xfs_fs_cmn_err(CE_ALERT, mp,
@@ -2360,23 +2383,24 @@ xlog_recover_do_inode_trans(
                        item, ino);
                XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)",
                                 XFS_ERRLEVEL_LOW, mp);
-               return XFS_ERROR(EFSCORRUPTED);
+               error = EFSCORRUPTED;
+               goto error;
        }
 
        /* Skip replay when the on disk inode is newer than the log one */
-       if (dicp->di_flushiter <
-           INT_GET(dip->di_core.di_flushiter, ARCH_CONVERT)) {
+       if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
                /*
                 * Deal with the wrap case, DI_MAX_FLUSH is less
                 * than smaller numbers
                 */
-               if ((INT_GET(dip->di_core.di_flushiter, ARCH_CONVERT)
-                                                       == DI_MAX_FLUSH) &&
-                   (dicp->di_flushiter < (DI_MAX_FLUSH>>1))) {
+               if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
+                   dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
                        /* do nothing */
                } else {
                        xfs_buf_relse(bp);
-                       return 0;
+                       trace_xfs_log_recover_inode_skip(log, in_f);
+                       error = 0;
+                       goto error;
                }
        }
        /* Take the opportunity to reset the flush iteration count */
@@ -2391,7 +2415,8 @@ xlog_recover_do_inode_trans(
                        xfs_fs_cmn_err(CE_ALERT, mp,
                                "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
                                item, dip, bp, ino);
-                       return XFS_ERROR(EFSCORRUPTED);
+                       error = EFSCORRUPTED;
+                       goto error;
                }
        } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) {
                if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
@@ -2403,7 +2428,8 @@ xlog_recover_do_inode_trans(
                        xfs_fs_cmn_err(CE_ALERT, mp,
                                "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
                                item, dip, bp, ino);
-                       return XFS_ERROR(EFSCORRUPTED);
+                       error = EFSCORRUPTED;
+                       goto error;
                }
        }
        if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
@@ -2415,7 +2441,8 @@ xlog_recover_do_inode_trans(
                        item, dip, bp, ino,
                        dicp->di_nextents + dicp->di_anextents,
                        dicp->di_nblocks);
-               return XFS_ERROR(EFSCORRUPTED);
+               error = EFSCORRUPTED;
+               goto error;
        }
        if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
                XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)",
@@ -2424,37 +2451,39 @@ xlog_recover_do_inode_trans(
                xfs_fs_cmn_err(CE_ALERT, mp,
                        "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x",
                        item, dip, bp, ino, dicp->di_forkoff);
-               return XFS_ERROR(EFSCORRUPTED);
+               error = EFSCORRUPTED;
+               goto error;
        }
-       if (unlikely(item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t))) {
+       if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
                XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)",
                                     XFS_ERRLEVEL_LOW, mp, dicp);
                xfs_buf_relse(bp);
                xfs_fs_cmn_err(CE_ALERT, mp,
                        "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p",
                        item->ri_buf[1].i_len, item);
-               return XFS_ERROR(EFSCORRUPTED);
+               error = EFSCORRUPTED;
+               goto error;
        }
 
        /* The core is in in-core format */
-       xfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core,
-                             (xfs_dinode_core_t*)item->ri_buf[1].i_addr, -1);
+       xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr);
 
        /* the rest is in on-disk format */
-       if (item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t)) {
-               memcpy((xfs_caddr_t) dip + sizeof(xfs_dinode_core_t),
-                       item->ri_buf[1].i_addr + sizeof(xfs_dinode_core_t),
-                       item->ri_buf[1].i_len  - sizeof(xfs_dinode_core_t));
+       if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
+               memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode),
+                       item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode),
+                       item->ri_buf[1].i_len  - sizeof(struct xfs_icdinode));
        }
 
        fields = in_f->ilf_fields;
        switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) {
        case XFS_ILOG_DEV:
-               INT_SET(dip->di_u.di_dev, ARCH_CONVERT, in_f->ilf_u.ilfu_rdev);
-
+               xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
                break;
        case XFS_ILOG_UUID:
-               dip->di_u.di_muuid = in_f->ilf_u.ilfu_uuid;
+               memcpy(XFS_DFORK_DPTR(dip),
+                      &in_f->ilf_u.ilfu_uuid,
+                      sizeof(uuid_t));
                break;
        }
 
@@ -2470,12 +2499,12 @@ xlog_recover_do_inode_trans(
        switch (fields & XFS_ILOG_DFORK) {
        case XFS_ILOG_DDATA:
        case XFS_ILOG_DEXT:
-               memcpy(&dip->di_u, src, len);
+               memcpy(XFS_DFORK_DPTR(dip), src, len);
                break;
 
        case XFS_ILOG_DBROOT:
-               xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len,
-                                &(dip->di_u.di_bmbt),
+               xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
+                                (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip),
                                 XFS_DFORK_DSIZE(dip, mp));
                break;
 
@@ -2512,8 +2541,8 @@ xlog_recover_do_inode_trans(
 
                case XFS_ILOG_ABROOT:
                        dest = XFS_DFORK_APTR(dip);
-                       xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len,
-                                        (xfs_bmdr_block_t*)dest,
+                       xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
+                                        len, (xfs_bmdr_block_t*)dest,
                                         XFS_DFORK_ASIZE(dip, mp));
                        break;
 
@@ -2521,23 +2550,20 @@ xlog_recover_do_inode_trans(
                        xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag");
                        ASSERT(0);
                        xfs_buf_relse(bp);
-                       return XFS_ERROR(EIO);
+                       error = EIO;
+                       goto error;
                }
        }
 
 write_inode_buffer:
-       if (ITEM_TYPE(item) == XFS_LI_INODE) {
-               ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
-                      XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp);
-               XFS_BUF_SET_FSPRIVATE(bp, mp);
-               XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
-               xfs_bdwrite(mp, bp);
-       } else {
-               XFS_BUF_STALE(bp);
-               error = xfs_bwrite(mp, bp);
-       }
-
-       return (error);
+       ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
+       bp->b_mount = mp;
+       XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
+       xfs_bdwrite(mp, bp);
+error:
+       if (need_free)
+               kmem_free(in_f);
+       return XFS_ERROR(error);
 }
 
 /*
@@ -2602,12 +2628,23 @@ xlog_recover_do_dquot_trans(
                return (0);
 
        recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
-       ASSERT(recddq);
+
+       if (item->ri_buf[1].i_addr == NULL) {
+               cmn_err(CE_ALERT,
+                       "XFS: NULL dquot in %s.", __func__);
+               return XFS_ERROR(EIO);
+       }
+       if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
+               cmn_err(CE_ALERT,
+                       "XFS: dquot too small (%d) in %s.",
+                       item->ri_buf[1].i_len, __func__);
+               return XFS_ERROR(EIO);
+       }
+
        /*
         * This type of quotas was turned off, so ignore this record.
         */
-       type = INT_GET(recddq->d_flags, ARCH_CONVERT) &
-                       (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
+       type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
        ASSERT(type);
        if (log->l_quotaoffs_flag & type)
                return (0);
@@ -2658,9 +2695,8 @@ xlog_recover_do_dquot_trans(
        memcpy(ddq, recddq, item->ri_buf[1].i_len);
 
        ASSERT(dq_f->qlf_size == 2);
-       ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
-              XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp);
-       XFS_BUF_SET_FSPRIVATE(bp, mp);
+       ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
+       bp->b_mount = mp;
        XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
        xfs_bdwrite(mp, bp);
 
@@ -2674,40 +2710,40 @@ xlog_recover_do_dquot_trans(
  * structure into it, and adds the efi to the AIL with the given
  * LSN.
  */
-STATIC void
+STATIC int
 xlog_recover_do_efi_trans(
        xlog_t                  *log,
        xlog_recover_item_t     *item,
        xfs_lsn_t               lsn,
        int                     pass)
 {
+       int                     error;
        xfs_mount_t             *mp;
        xfs_efi_log_item_t      *efip;
        xfs_efi_log_format_t    *efi_formatp;
-       SPLDECL(s);
 
        if (pass == XLOG_RECOVER_PASS1) {
-               return;
+               return 0;
        }
 
        efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr;
-       ASSERT(item->ri_buf[0].i_len ==
-              (sizeof(xfs_efi_log_format_t) +
-               ((efi_formatp->efi_nextents - 1) * sizeof(xfs_extent_t))));
 
        mp = log->l_mp;
        efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
-       memcpy((char *)&(efip->efi_format), (char *)efi_formatp,
-             sizeof(xfs_efi_log_format_t) +
-             ((efi_formatp->efi_nextents - 1) * sizeof(xfs_extent_t)));
+       if ((error = xfs_efi_copy_format(&(item->ri_buf[0]),
+                                        &(efip->efi_format)))) {
+               xfs_efi_item_free(efip);
+               return error;
+       }
        efip->efi_next_extent = efi_formatp->efi_nextents;
        efip->efi_flags |= XFS_EFI_COMMITTED;
 
-       AIL_LOCK(mp,s);
+       spin_lock(&log->l_ailp->xa_lock);
        /*
-        * xfs_trans_update_ail() drops the AIL lock.
+        * xfs_trans_ail_update() drops the AIL lock.
         */
-       xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn, s);
+       xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn);
+       return 0;
 }
 
 
@@ -2725,55 +2761,48 @@ xlog_recover_do_efd_trans(
        xlog_recover_item_t     *item,
        int                     pass)
 {
-       xfs_mount_t             *mp;
        xfs_efd_log_format_t    *efd_formatp;
        xfs_efi_log_item_t      *efip = NULL;
        xfs_log_item_t          *lip;
-       int                     gen;
        __uint64_t              efi_id;
-       SPLDECL(s);
+       struct xfs_ail_cursor   cur;
+       struct xfs_ail          *ailp = log->l_ailp;
 
        if (pass == XLOG_RECOVER_PASS1) {
                return;
        }
 
        efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr;
-       ASSERT(item->ri_buf[0].i_len ==
-              (sizeof(xfs_efd_log_format_t) +
-               ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_t))));
+       ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
+               ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
+              (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
+               ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
        efi_id = efd_formatp->efd_efi_id;
 
        /*
         * Search for the efi with the id in the efd format structure
         * in the AIL.
         */
-       mp = log->l_mp;
-       AIL_LOCK(mp,s);
-       lip = xfs_trans_first_ail(mp, &gen);
+       spin_lock(&ailp->xa_lock);
+       lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
        while (lip != NULL) {
                if (lip->li_type == XFS_LI_EFI) {
                        efip = (xfs_efi_log_item_t *)lip;
                        if (efip->efi_format.efi_id == efi_id) {
                                /*
-                                * xfs_trans_delete_ail() drops the
+                                * xfs_trans_ail_delete() drops the
                                 * AIL lock.
                                 */
-                               xfs_trans_delete_ail(mp, lip, s);
+                               xfs_trans_ail_delete(ailp, lip);
+                               xfs_efi_item_free(efip);
+                               spin_lock(&ailp->xa_lock);
                                break;
                        }
                }
-               lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
-       }
-
-       /*
-        * If we found it, then free it up.  If it wasn't there, it
-        * must have been overwritten in the log.  Oh well.
-        */
-       if (lip != NULL) {
-               xfs_efi_item_free(efip);
-       } else {
-               AIL_UNLOCK(mp, s);
+               lip = xfs_trans_ail_cursor_next(ailp, &cur);
        }
+       xfs_trans_ail_cursor_done(ailp, &cur);
+       spin_unlock(&ailp->xa_lock);
 }
 
 /*
@@ -2789,56 +2818,49 @@ xlog_recover_do_trans(
        int                     pass)
 {
        int                     error = 0;
-       xlog_recover_item_t     *item, *first_item;
+       xlog_recover_item_t     *item;
 
-       if ((error = xlog_recover_reorder_trans(log, trans)))
+       error = xlog_recover_reorder_trans(log, trans, pass);
+       if (error)
                return error;
-       first_item = item = trans->r_itemq;
-       do {
-               /*
-                * we don't need to worry about the block number being
-                * truncated in > 1 TB buffers because in user-land,
-                * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so
-                * the blknos will get through the user-mode buffer
-                * cache properly.  The only bad case is o32 kernels
-                * where xfs_daddr_t is 32-bits but mount will warn us
-                * off a > 1 TB filesystem before we get here.
-                */
-               if ((ITEM_TYPE(item) == XFS_LI_BUF) ||
-                   (ITEM_TYPE(item) == XFS_LI_6_1_BUF) ||
-                   (ITEM_TYPE(item) == XFS_LI_5_3_BUF)) {
-                       if  ((error = xlog_recover_do_buffer_trans(log, item,
-                                                                pass)))
-                               break;
-               } else if ((ITEM_TYPE(item) == XFS_LI_INODE) ||
-                          (ITEM_TYPE(item) == XFS_LI_6_1_INODE) ||
-                          (ITEM_TYPE(item) == XFS_LI_5_3_INODE)) {
-                       if ((error = xlog_recover_do_inode_trans(log, item,
-                                                               pass)))
-                               break;
-               } else if (ITEM_TYPE(item) == XFS_LI_EFI) {
-                       xlog_recover_do_efi_trans(log, item, trans->r_lsn,
-                                                 pass);
-               } else if (ITEM_TYPE(item) == XFS_LI_EFD) {
+
+       list_for_each_entry(item, &trans->r_itemq, ri_list) {
+               trace_xfs_log_recover_item_recover(log, trans, item, pass);
+               switch (ITEM_TYPE(item)) {
+               case XFS_LI_BUF:
+                       error = xlog_recover_do_buffer_trans(log, item, pass);
+                       break;
+               case XFS_LI_INODE:
+                       error = xlog_recover_do_inode_trans(log, item, pass);
+                       break;
+               case XFS_LI_EFI:
+                       error = xlog_recover_do_efi_trans(log, item,
+                                                         trans->r_lsn, pass);
+                       break;
+               case XFS_LI_EFD:
                        xlog_recover_do_efd_trans(log, item, pass);
-               } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) {
-                       if ((error = xlog_recover_do_dquot_trans(log, item,
-                                                                  pass)))
-                                       break;
-               } else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) {
-                       if ((error = xlog_recover_do_quotaoff_trans(log, item,
-                                                                  pass)))
-                                       break;
-               } else {
-                       xlog_warn("XFS: xlog_recover_do_trans");
+                       error = 0;
+                       break;
+               case XFS_LI_DQUOT:
+                       error = xlog_recover_do_dquot_trans(log, item, pass);
+                       break;
+               case XFS_LI_QUOTAOFF:
+                       error = xlog_recover_do_quotaoff_trans(log, item,
+                                                              pass);
+                       break;
+               default:
+                       xlog_warn(
+       "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item));
                        ASSERT(0);
                        error = XFS_ERROR(EIO);
                        break;
                }
-               item = item->ri_next;
-       } while (first_item != item);
 
-       return error;
+               if (error)
+                       return error;
+       }
+
+       return 0;
 }
 
 /*
@@ -2850,38 +2872,31 @@ STATIC void
 xlog_recover_free_trans(
        xlog_recover_t          *trans)
 {
-       xlog_recover_item_t     *first_item, *item, *free_item;
+       xlog_recover_item_t     *item, *n;
        int                     i;
 
-       item = first_item = trans->r_itemq;
-       do {
-               free_item = item;
-               item = item->ri_next;
-                /* Free the regions in the item. */
-               for (i = 0; i < free_item->ri_cnt; i++) {
-                       kmem_free(free_item->ri_buf[i].i_addr,
-                                 free_item->ri_buf[i].i_len);
-               }
+       list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
+               /* Free the regions in the item. */
+               list_del(&item->ri_list);
+               for (i = 0; i < item->ri_cnt; i++)
+                       kmem_free(item->ri_buf[i].i_addr);
                /* Free the item itself */
-               kmem_free(free_item->ri_buf,
-                         (free_item->ri_total * sizeof(xfs_log_iovec_t)));
-               kmem_free(free_item, sizeof(xlog_recover_item_t));
-       } while (first_item != item);
+               kmem_free(item->ri_buf);
+               kmem_free(item);
+       }
        /* Free the transaction recover structure */
-       kmem_free(trans, sizeof(xlog_recover_t));
+       kmem_free(trans);
 }
 
 STATIC int
 xlog_recover_commit_trans(
        xlog_t                  *log,
-       xlog_recover_t          **q,
        xlog_recover_t          *trans,
        int                     pass)
 {
        int                     error;
 
-       if ((error = xlog_recover_unlink_tid(q, trans)))
-               return error;
+       hlist_del(&trans->r_list);
        if ((error = xlog_recover_do_trans(log, trans, pass)))
                return error;
        xlog_recover_free_trans(trans);                 /* no error */
@@ -2909,7 +2924,7 @@ xlog_recover_unmount_trans(
 STATIC int
 xlog_recover_process_data(
        xlog_t                  *log,
-       xlog_recover_t          *rhash[],
+       struct hlist_head       rhash[],
        xlog_rec_header_t       *rhead,
        xfs_caddr_t             dp,
        int                     pass)
@@ -2923,8 +2938,8 @@ xlog_recover_process_data(
        unsigned long           hash;
        uint                    flags;
 
-       lp = dp + INT_GET(rhead->h_len, ARCH_CONVERT);
-       num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT);
+       lp = dp + be32_to_cpu(rhead->h_len);
+       num_logops = be32_to_cpu(rhead->h_num_logops);
 
        /* check the log format matches our own - else we can't recover */
        if (xlog_header_check_recover(log->l_mp, rhead))
@@ -2941,30 +2956,35 @@ xlog_recover_process_data(
                        ASSERT(0);
                        return (XFS_ERROR(EIO));
                }
-               tid = INT_GET(ohead->oh_tid, ARCH_CONVERT);
+               tid = be32_to_cpu(ohead->oh_tid);
                hash = XLOG_RHASH(tid);
-               trans = xlog_recover_find_tid(rhash[hash], tid);
+               trans = xlog_recover_find_tid(&rhash[hash], tid);
                if (trans == NULL) {               /* not found; add new tid */
                        if (ohead->oh_flags & XLOG_START_TRANS)
                                xlog_recover_new_tid(&rhash[hash], tid,
-                                       INT_GET(rhead->h_lsn, ARCH_CONVERT));
+                                       be64_to_cpu(rhead->h_lsn));
                } else {
-                       ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp);
+                       if (dp + be32_to_cpu(ohead->oh_len) > lp) {
+                               xlog_warn(
+                       "XFS: xlog_recover_process_data: bad length");
+                               WARN_ON(1);
+                               return (XFS_ERROR(EIO));
+                       }
                        flags = ohead->oh_flags & ~XLOG_END_TRANS;
                        if (flags & XLOG_WAS_CONT_TRANS)
                                flags &= ~XLOG_CONTINUE_TRANS;
                        switch (flags) {
                        case XLOG_COMMIT_TRANS:
                                error = xlog_recover_commit_trans(log,
-                                               &rhash[hash], trans, pass);
+                                                               trans, pass);
                                break;
                        case XLOG_UNMOUNT_TRANS:
                                error = xlog_recover_unmount_trans(trans);
                                break;
                        case XLOG_WAS_CONT_TRANS:
-                               error = xlog_recover_add_to_cont_trans(trans,
-                                               dp, INT_GET(ohead->oh_len,
-                                                       ARCH_CONVERT));
+                               error = xlog_recover_add_to_cont_trans(log,
+                                               trans, dp,
+                                               be32_to_cpu(ohead->oh_len));
                                break;
                        case XLOG_START_TRANS:
                                xlog_warn(
@@ -2974,9 +2994,8 @@ xlog_recover_process_data(
                                break;
                        case 0:
                        case XLOG_CONTINUE_TRANS:
-                               error = xlog_recover_add_to_trans(trans,
-                                               dp, INT_GET(ohead->oh_len,
-                                                       ARCH_CONVERT));
+                               error = xlog_recover_add_to_trans(log, trans,
+                                               dp, be32_to_cpu(ohead->oh_len));
                                break;
                        default:
                                xlog_warn(
@@ -2988,7 +3007,7 @@ xlog_recover_process_data(
                        if (error)
                                return error;
                }
-               dp += INT_GET(ohead->oh_len, ARCH_CONVERT);
+               dp += be32_to_cpu(ohead->oh_len);
                num_logops--;
        }
        return 0;
@@ -2998,7 +3017,7 @@ xlog_recover_process_data(
  * Process an extent free intent item that was recovered from
  * the log.  We need to free the extents that it describes.
  */
-STATIC void
+STATIC int
 xlog_recover_process_efi(
        xfs_mount_t             *mp,
        xfs_efi_log_item_t      *efip)
@@ -3006,6 +3025,7 @@ xlog_recover_process_efi(
        xfs_efd_log_item_t      *efdp;
        xfs_trans_t             *tp;
        int                     i;
+       int                     error = 0;
        xfs_extent_t            *extp;
        xfs_fsblock_t           startblock_fsb;
 
@@ -3029,51 +3049,33 @@ xlog_recover_process_efi(
                         * free the memory associated with it.
                         */
                        xfs_efi_release(efip, efip->efi_format.efi_nextents);
-                       return;
+                       return XFS_ERROR(EIO);
                }
        }
 
        tp = xfs_trans_alloc(mp, 0);
-       xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0);
+       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0);
+       if (error)
+               goto abort_error;
        efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
 
        for (i = 0; i < efip->efi_format.efi_nextents; i++) {
                extp = &(efip->efi_format.efi_extents[i]);
-               xfs_free_extent(tp, extp->ext_start, extp->ext_len);
+               error = xfs_free_extent(tp, extp->ext_start, extp->ext_len);
+               if (error)
+                       goto abort_error;
                xfs_trans_log_efd_extent(tp, efdp, extp->ext_start,
                                         extp->ext_len);
        }
 
        efip->efi_flags |= XFS_EFI_RECOVERED;
-       xfs_trans_commit(tp, 0, NULL);
-}
-
-/*
- * Verify that once we've encountered something other than an EFI
- * in the AIL that there are no more EFIs in the AIL.
- */
-#if defined(DEBUG)
-STATIC void
-xlog_recover_check_ail(
-       xfs_mount_t             *mp,
-       xfs_log_item_t          *lip,
-       int                     gen)
-{
-       int                     orig_gen = gen;
+       error = xfs_trans_commit(tp, 0);
+       return error;
 
-       do {
-               ASSERT(lip->li_type != XFS_LI_EFI);
-               lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
-               /*
-                * The check will be bogus if we restart from the
-                * beginning of the AIL, so ASSERT that we don't.
-                * We never should since we're holding the AIL lock
-                * the entire time.
-                */
-               ASSERT(gen == orig_gen);
-       } while (lip != NULL);
+abort_error:
+       xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+       return error;
 }
-#endif /* DEBUG */
 
 /*
  * When this is called, all of the EFIs which did not have
@@ -3093,26 +3095,29 @@ xlog_recover_check_ail(
  * everything already in the AIL, we stop processing as soon as
  * we see something other than an EFI in the AIL.
  */
-STATIC void
+STATIC int
 xlog_recover_process_efis(
        xlog_t                  *log)
 {
        xfs_log_item_t          *lip;
        xfs_efi_log_item_t      *efip;
-       int                     gen;
-       xfs_mount_t             *mp;
-       SPLDECL(s);
-
-       mp = log->l_mp;
-       AIL_LOCK(mp,s);
+       int                     error = 0;
+       struct xfs_ail_cursor   cur;
+       struct xfs_ail          *ailp;
 
-       lip = xfs_trans_first_ail(mp, &gen);
+       ailp = log->l_ailp;
+       spin_lock(&ailp->xa_lock);
+       lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
        while (lip != NULL) {
                /*
                 * We're done when we see something other than an EFI.
+                * There should be no EFIs left in the AIL now.
                 */
                if (lip->li_type != XFS_LI_EFI) {
-                       xlog_recover_check_ail(mp, lip, gen);
+#ifdef DEBUG
+                       for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
+                               ASSERT(lip->li_type != XFS_LI_EFI);
+#endif
                        break;
                }
 
@@ -3121,16 +3126,21 @@ xlog_recover_process_efis(
                 */
                efip = (xfs_efi_log_item_t *)lip;
                if (efip->efi_flags & XFS_EFI_RECOVERED) {
-                       lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
+                       lip = xfs_trans_ail_cursor_next(ailp, &cur);
                        continue;
                }
 
-               AIL_UNLOCK(mp, s);
-               xlog_recover_process_efi(mp, efip);
-               AIL_LOCK(mp,s);
-               lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
+               spin_unlock(&ailp->xa_lock);
+               error = xlog_recover_process_efi(log->l_mp, efip);
+               spin_lock(&ailp->xa_lock);
+               if (error)
+                       goto out;
+               lip = xfs_trans_ail_cursor_next(ailp, &cur);
        }
-       AIL_UNLOCK(mp, s);
+out:
+       xfs_trans_ail_cursor_done(ailp, &cur);
+       spin_unlock(&ailp->xa_lock);
+       return error;
 }
 
 /*
@@ -3150,29 +3160,89 @@ xlog_recover_clear_agi_bucket(
        int             error;
 
        tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET);
-       xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0);
+       error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp),
+                                 0, 0, 0);
+       if (error)
+               goto out_abort;
 
-       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-                                  XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
-                                  XFS_FSS_TO_BB(mp, 1), 0, &agibp);
-       if (error) {
-               xfs_trans_cancel(tp, XFS_TRANS_ABORT);
-               return;
-       }
+       error = xfs_read_agi(mp, tp, agno, &agibp);
+       if (error)
+               goto out_abort;
 
        agi = XFS_BUF_TO_AGI(agibp);
-       if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) {
-               xfs_trans_cancel(tp, XFS_TRANS_ABORT);
-               return;
-       }
-
        agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
        offset = offsetof(xfs_agi_t, agi_unlinked) +
                 (sizeof(xfs_agino_t) * bucket);
        xfs_trans_log_buf(tp, agibp, offset,
                          (offset + sizeof(xfs_agino_t) - 1));
 
-       (void) xfs_trans_commit(tp, 0, NULL);
+       error = xfs_trans_commit(tp, 0);
+       if (error)
+               goto out_error;
+       return;
+
+out_abort:
+       xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+out_error:
+       xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: "
+                       "failed to clear agi %d. Continuing.", agno);
+       return;
+}
+
+STATIC xfs_agino_t
+xlog_recover_process_one_iunlink(
+       struct xfs_mount                *mp,
+       xfs_agnumber_t                  agno,
+       xfs_agino_t                     agino,
+       int                             bucket)
+{
+       struct xfs_buf                  *ibp;
+       struct xfs_dinode               *dip;
+       struct xfs_inode                *ip;
+       xfs_ino_t                       ino;
+       int                             error;
+
+       ino = XFS_AGINO_TO_INO(mp, agno, agino);
+       error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0);
+       if (error)
+               goto fail;
+
+       /*
+        * Get the on disk inode to find the next inode in the bucket.
+        */
+       error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK);
+       if (error)
+               goto fail_iput;
+
+       ASSERT(ip->i_d.di_nlink == 0);
+       ASSERT(ip->i_d.di_mode != 0);
+
+       /* setup for the next pass */
+       agino = be32_to_cpu(dip->di_next_unlinked);
+       xfs_buf_relse(ibp);
+
+       /*
+        * Prevent any DMAPI event from being sent when the reference on
+        * the inode is dropped.
+        */
+       ip->i_d.di_dmevmask = 0;
+
+       IRELE(ip);
+       return agino;
+
+ fail_iput:
+       IRELE(ip);
+ fail:
+       /*
+        * We can't read in the inode this bucket points to, or this inode
+        * is messed up.  Just ditch this bucket of inodes.  We will lose
+        * some inodes and space, but at least we won't hang.
+        *
+        * Call xlog_recover_clear_agi_bucket() to perform a transaction to
+        * clear the inode pointer in the bucket.
+        */
+       xlog_recover_clear_agi_bucket(mp, agno, bucket);
+       return NULLAGINO;
 }
 
 /*
@@ -3187,7 +3257,7 @@ xlog_recover_clear_agi_bucket(
  * freeing of the inode and its removal from the list must be
  * atomic.
  */
-void
+STATIC void
 xlog_recover_process_iunlinks(
        xlog_t          *log)
 {
@@ -3195,11 +3265,7 @@ xlog_recover_process_iunlinks(
        xfs_agnumber_t  agno;
        xfs_agi_t       *agi;
        xfs_buf_t       *agibp;
-       xfs_buf_t       *ibp;
-       xfs_dinode_t    *dip;
-       xfs_inode_t     *ip;
        xfs_agino_t     agino;
-       xfs_ino_t       ino;
        int             bucket;
        int             error;
        uint            mp_dmevmask;
@@ -3216,22 +3282,21 @@ xlog_recover_process_iunlinks(
                /*
                 * Find the agi for this ag.
                 */
-               agibp = xfs_buf_read(mp->m_ddev_targp,
-                               XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
-                               XFS_FSS_TO_BB(mp, 1), 0);
-               if (XFS_BUF_ISERROR(agibp)) {
-                       xfs_ioerror_alert("xlog_recover_process_iunlinks(#1)",
-                               log->l_mp, agibp,
-                               XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)));
+               error = xfs_read_agi(mp, NULL, agno, &agibp);
+               if (error) {
+                       /*
+                        * AGI is b0rked. Don't process it.
+                        *
+                        * We should probably mark the filesystem as corrupt
+                        * after we've recovered all the ag's we can....
+                        */
+                       continue;
                }
                agi = XFS_BUF_TO_AGI(agibp);
-               ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agi->agi_magicnum));
 
                for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
-
                        agino = be32_to_cpu(agi->agi_unlinked[bucket]);
                        while (agino != NULLAGINO) {
-
                                /*
                                 * Release the agi buffer so that it can
                                 * be acquired in the normal course of the
@@ -3239,86 +3304,17 @@ xlog_recover_process_iunlinks(
                                 */
                                xfs_buf_relse(agibp);
 
-                               ino = XFS_AGINO_TO_INO(mp, agno, agino);
-                               error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0);
-                               ASSERT(error || (ip != NULL));
-
-                               if (!error) {
-                                       /*
-                                        * Get the on disk inode to find the
-                                        * next inode in the bucket.
-                                        */
-                                       error = xfs_itobp(mp, NULL, ip, &dip,
-                                                       &ibp, 0, 0);
-                                       ASSERT(error || (dip != NULL));
-                               }
-
-                               if (!error) {
-                                       ASSERT(ip->i_d.di_nlink == 0);
-
-                                       /* setup for the next pass */
-                                       agino = INT_GET(dip->di_next_unlinked,
-                                                       ARCH_CONVERT);
-                                       xfs_buf_relse(ibp);
-                                       /*
-                                        * Prevent any DMAPI event from
-                                        * being sent when the
-                                        * reference on the inode is
-                                        * dropped.
-                                        */
-                                       ip->i_d.di_dmevmask = 0;
-
-                                       /*
-                                        * If this is a new inode, handle
-                                        * it specially.  Otherwise,
-                                        * just drop our reference to the
-                                        * inode.  If there are no
-                                        * other references, this will
-                                        * send the inode to
-                                        * xfs_inactive() which will
-                                        * truncate the file and free
-                                        * the inode.
-                                        */
-                                       if (ip->i_d.di_mode == 0)
-                                               xfs_iput_new(ip, 0);
-                                       else
-                                               VN_RELE(XFS_ITOV(ip));
-                               } else {
-                                       /*
-                                        * We can't read in the inode
-                                        * this bucket points to, or
-                                        * this inode is messed up.  Just
-                                        * ditch this bucket of inodes.  We
-                                        * will lose some inodes and space,
-                                        * but at least we won't hang.  Call
-                                        * xlog_recover_clear_agi_bucket()
-                                        * to perform a transaction to clear
-                                        * the inode pointer in the bucket.
-                                        */
-                                       xlog_recover_clear_agi_bucket(mp, agno,
-                                                       bucket);
-
-                                       agino = NULLAGINO;
-                               }
+                               agino = xlog_recover_process_one_iunlink(mp,
+                                                       agno, agino, bucket);
 
                                /*
                                 * Reacquire the agibuffer and continue around
-                                * the loop.
+                                * the loop. This should never fail as we know
+                                * the buffer was good earlier on.
                                 */
-                               agibp = xfs_buf_read(mp->m_ddev_targp,
-                                               XFS_AG_DADDR(mp, agno,
-                                                       XFS_AGI_DADDR(mp)),
-                                               XFS_FSS_TO_BB(mp, 1), 0);
-                               if (XFS_BUF_ISERROR(agibp)) {
-                                       xfs_ioerror_alert(
-                               "xlog_recover_process_iunlinks(#2)",
-                                               log->l_mp, agibp,
-                                               XFS_AG_DADDR(mp, agno,
-                                                       XFS_AGI_DADDR(mp)));
-                               }
+                               error = xfs_read_agi(mp, NULL, agno, &agibp);
+                               ASSERT(error == 0);
                                agi = XFS_BUF_TO_AGI(agibp);
-                               ASSERT(XFS_AGI_MAGIC == be32_to_cpu(
-                                       agi->agi_magicnum));
                        }
                }
 
@@ -3341,16 +3337,16 @@ xlog_pack_data_checksum(
        int             size)
 {
        int             i;
-       uint            *up;
+       __be32          *up;
        uint            chksum = 0;
 
-       up = (uint *)iclog->ic_datap;
+       up = (__be32 *)iclog->ic_datap;
        /* divide length by 4 to get # words */
        for (i = 0; i < (size >> 2); i++) {
-               chksum ^= INT_GET(*up, ARCH_CONVERT);
+               chksum ^= be32_to_cpu(*up);
                up++;
        }
-       INT_SET(iclog->ic_header.h_chksum, ARCH_CONVERT, chksum);
+       iclog->ic_header.h_chksum = cpu_to_be32(chksum);
 }
 #else
 #define xlog_pack_data_checksum(log, iclog, size)
@@ -3367,9 +3363,8 @@ xlog_pack_data(
 {
        int                     i, j, k;
        int                     size = iclog->ic_offset + roundoff;
-       uint                    cycle_lsn;
+       __be32                  cycle_lsn;
        xfs_caddr_t             dp;
-       xlog_in_core_2_t        *xhdr;
 
        xlog_pack_data_checksum(log, iclog, size);
 
@@ -3378,18 +3373,19 @@ xlog_pack_data(
        dp = iclog->ic_datap;
        for (i = 0; i < BTOBB(size) &&
                i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
-               iclog->ic_header.h_cycle_data[i] = *(uint *)dp;
-               *(uint *)dp = cycle_lsn;
+               iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp;
+               *(__be32 *)dp = cycle_lsn;
                dp += BBSIZE;
        }
 
-       if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
-               xhdr = (xlog_in_core_2_t *)&iclog->ic_header;
+       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+               xlog_in_core_2_t *xhdr = iclog->ic_data;
+
                for ( ; i < BTOBB(size); i++) {
                        j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
                        k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
-                       xhdr[j].hic_xheader.xh_cycle_data[k] = *(uint *)dp;
-                       *(uint *)dp = cycle_lsn;
+                       xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp;
+                       *(__be32 *)dp = cycle_lsn;
                        dp += BBSIZE;
                }
 
@@ -3399,42 +3395,6 @@ xlog_pack_data(
        }
 }
 
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-STATIC void
-xlog_unpack_data_checksum(
-       xlog_rec_header_t       *rhead,
-       xfs_caddr_t             dp,
-       xlog_t                  *log)
-{
-       uint                    *up = (uint *)dp;
-       uint                    chksum = 0;
-       int                     i;
-
-       /* divide length by 4 to get # words */
-       for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) {
-               chksum ^= INT_GET(*up, ARCH_CONVERT);
-               up++;
-       }
-       if (chksum != INT_GET(rhead->h_chksum, ARCH_CONVERT)) {
-           if (rhead->h_chksum ||
-               ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
-                   cmn_err(CE_DEBUG,
-                       "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)",
-                           INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum);
-                   cmn_err(CE_DEBUG,
-"XFS: Disregard message if filesystem was created with non-DEBUG kernel");
-                   if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
-                           cmn_err(CE_DEBUG,
-                               "XFS: LogR this is a LogV2 filesystem");
-                   }
-                   log->l_flags |= XLOG_CHKSUM_MISMATCH;
-           }
-       }
-}
-#else
-#define xlog_unpack_data_checksum(rhead, dp, log)
-#endif
-
 STATIC void
 xlog_unpack_data(
        xlog_rec_header_t       *rhead,
@@ -3442,25 +3402,22 @@ xlog_unpack_data(
        xlog_t                  *log)
 {
        int                     i, j, k;
-       xlog_in_core_2_t        *xhdr;
 
-       for (i = 0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) &&
+       for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
                  i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
-               *(uint *)dp = *(uint *)&rhead->h_cycle_data[i];
+               *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
                dp += BBSIZE;
        }
 
-       if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
-               xhdr = (xlog_in_core_2_t *)rhead;
-               for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) {
+       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+               xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
+               for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
                        j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
                        k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
-                       *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
+                       *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
                        dp += BBSIZE;
                }
        }
-
-       xlog_unpack_data_checksum(rhead, dp, log);
 }
 
 STATIC int
@@ -3471,24 +3428,21 @@ xlog_valid_rec_header(
 {
        int                     hlen;
 
-       if (unlikely(
-           (INT_GET(rhead->h_magicno, ARCH_CONVERT) !=
-                       XLOG_HEADER_MAGIC_NUM))) {
+       if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) {
                XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
                                XFS_ERRLEVEL_LOW, log->l_mp);
                return XFS_ERROR(EFSCORRUPTED);
        }
        if (unlikely(
            (!rhead->h_version ||
-           (INT_GET(rhead->h_version, ARCH_CONVERT) &
-                       (~XLOG_VERSION_OKBITS)) != 0))) {
+           (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
                xlog_warn("XFS: %s: unrecognised log version (%d).",
-                       __FUNCTION__, INT_GET(rhead->h_version, ARCH_CONVERT));
+                       __func__, be32_to_cpu(rhead->h_version));
                return XFS_ERROR(EIO);
        }
 
        /* LR body must have data or it wouldn't have been written */
-       hlen = INT_GET(rhead->h_len, ARCH_CONVERT);
+       hlen = be32_to_cpu(rhead->h_len);
        if (unlikely( hlen <= 0 || hlen > INT_MAX )) {
                XFS_ERROR_REPORT("xlog_valid_rec_header(2)",
                                XFS_ERRLEVEL_LOW, log->l_mp);
@@ -3519,12 +3473,12 @@ xlog_do_recovery_pass(
 {
        xlog_rec_header_t       *rhead;
        xfs_daddr_t             blk_no;
-       xfs_caddr_t             bufaddr, offset;
+       xfs_caddr_t             offset;
        xfs_buf_t               *hbp, *dbp;
        int                     error = 0, h_size;
        int                     bblks, split_bblks;
        int                     hblks, split_hblks, wrapped_hblks;
-       xlog_recover_t          *rhash[XLOG_RHASH_SIZE];
+       struct hlist_head       rhash[XLOG_RHASH_SIZE];
 
        ASSERT(head_blk != tail_blk);
 
@@ -3532,7 +3486,7 @@ xlog_do_recovery_pass(
         * Read the header of the tail block and get the iclog buffer size from
         * h_size.  Use this to tell how many sectors make up the log header.
         */
-       if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
                /*
                 * When using variable length iclogs, read first sector of
                 * iclog header and extract the header size from it.  Get a
@@ -3541,16 +3495,17 @@ xlog_do_recovery_pass(
                hbp = xlog_get_bp(log, 1);
                if (!hbp)
                        return ENOMEM;
-               if ((error = xlog_bread(log, tail_blk, 1, hbp)))
+
+               error = xlog_bread(log, tail_blk, 1, hbp, &offset);
+               if (error)
                        goto bread_err1;
-               offset = xlog_align(log, tail_blk, 1, hbp);
+
                rhead = (xlog_rec_header_t *)offset;
                error = xlog_valid_rec_header(log, rhead, tail_blk);
                if (error)
                        goto bread_err1;
-               h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
-               if ((INT_GET(rhead->h_version, ARCH_CONVERT)
-                               & XLOG_VERSION_2) &&
+               h_size = be32_to_cpu(rhead->h_size);
+               if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
                    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
                        hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
                        if (h_size % XLOG_HEADER_CYCLE_SIZE)
@@ -3561,7 +3516,7 @@ xlog_do_recovery_pass(
                        hblks = 1;
                }
        } else {
-               ASSERT(log->l_sectbb_log == 0);
+               ASSERT(log->l_sectBBsize == 1);
                hblks = 1;
                hbp = xlog_get_bp(log, 1);
                h_size = XLOG_BIG_RECORD_BSIZE;
@@ -3578,20 +3533,22 @@ xlog_do_recovery_pass(
        memset(rhash, 0, sizeof(rhash));
        if (tail_blk <= head_blk) {
                for (blk_no = tail_blk; blk_no < head_blk; ) {
-                       if ((error = xlog_bread(log, blk_no, hblks, hbp)))
+                       error = xlog_bread(log, blk_no, hblks, hbp, &offset);
+                       if (error)
                                goto bread_err2;
-                       offset = xlog_align(log, blk_no, hblks, hbp);
+
                        rhead = (xlog_rec_header_t *)offset;
                        error = xlog_valid_rec_header(log, rhead, blk_no);
                        if (error)
                                goto bread_err2;
 
                        /* blocks in data section */
-                       bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
-                       error = xlog_bread(log, blk_no + hblks, bblks, dbp);
+                       bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
+                       error = xlog_bread(log, blk_no + hblks, bblks, dbp,
+                                          &offset);
                        if (error)
                                goto bread_err2;
-                       offset = xlog_align(log, blk_no + hblks, bblks, dbp);
+
                        xlog_unpack_data(rhead, offset, log);
                        if ((error = xlog_recover_process_data(log,
                                                rhash, rhead, offset, pass)))
@@ -3609,15 +3566,15 @@ xlog_do_recovery_pass(
                        /*
                         * Check for header wrapping around physical end-of-log
                         */
-                       offset = NULL;
+                       offset = XFS_BUF_PTR(hbp);
                        split_hblks = 0;
                        wrapped_hblks = 0;
                        if (blk_no + hblks <= log->l_logBBsize) {
                                /* Read header in one read */
-                               error = xlog_bread(log, blk_no, hblks, hbp);
+                               error = xlog_bread(log, blk_no, hblks, hbp,
+                                                  &offset);
                                if (error)
                                        goto bread_err2;
-                               offset = xlog_align(log, blk_no, hblks, hbp);
                        } else {
                                /* This LR is split across physical log end */
                                if (blk_no != log->l_logBBsize) {
@@ -3625,12 +3582,13 @@ xlog_do_recovery_pass(
                                        ASSERT(blk_no <= INT_MAX);
                                        split_hblks = log->l_logBBsize - (int)blk_no;
                                        ASSERT(split_hblks > 0);
-                                       if ((error = xlog_bread(log, blk_no,
-                                                       split_hblks, hbp)))
+                                       error = xlog_bread(log, blk_no,
+                                                          split_hblks, hbp,
+                                                          &offset);
+                                       if (error)
                                                goto bread_err2;
-                                       offset = xlog_align(log, blk_no,
-                                                       split_hblks, hbp);
                                }
+
                                /*
                                 * Note: this black magic still works with
                                 * large sector sizes (non-512) only because:
@@ -3643,18 +3601,22 @@ xlog_do_recovery_pass(
                                 *   _first_, then the log start (LR header end)
                                 *   - order is important.
                                 */
-                               bufaddr = XFS_BUF_PTR(hbp);
-                               XFS_BUF_SET_PTR(hbp,
-                                               bufaddr + BBTOB(split_hblks),
-                                               BBTOB(hblks - split_hblks));
                                wrapped_hblks = hblks - split_hblks;
-                               error = xlog_bread(log, 0, wrapped_hblks, hbp);
+                               error = XFS_BUF_SET_PTR(hbp,
+                                               offset + BBTOB(split_hblks),
+                                               BBTOB(hblks - split_hblks));
+                               if (error)
+                                       goto bread_err2;
+
+                               error = xlog_bread_noalign(log, 0,
+                                                          wrapped_hblks, hbp);
+                               if (error)
+                                       goto bread_err2;
+
+                               error = XFS_BUF_SET_PTR(hbp, offset,
+                                                       BBTOB(hblks));
                                if (error)
                                        goto bread_err2;
-                               XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks));
-                               if (!offset)
-                                       offset = xlog_align(log, 0,
-                                                       wrapped_hblks, hbp);
                        }
                        rhead = (xlog_rec_header_t *)offset;
                        error = xlog_valid_rec_header(log, rhead,
@@ -3662,19 +3624,19 @@ xlog_do_recovery_pass(
                        if (error)
                                goto bread_err2;
 
-                       bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
+                       bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
                        blk_no += hblks;
 
                        /* Read in data for log record */
                        if (blk_no + bblks <= log->l_logBBsize) {
-                               error = xlog_bread(log, blk_no, bblks, dbp);
+                               error = xlog_bread(log, blk_no, bblks, dbp,
+                                                  &offset);
                                if (error)
                                        goto bread_err2;
-                               offset = xlog_align(log, blk_no, bblks, dbp);
                        } else {
                                /* This log record is split across the
                                 * physical end of log */
-                               offset = NULL;
+                               offset = XFS_BUF_PTR(dbp);
                                split_bblks = 0;
                                if (blk_no != log->l_logBBsize) {
                                        /* some data is before the physical
@@ -3684,12 +3646,13 @@ xlog_do_recovery_pass(
                                        split_bblks =
                                                log->l_logBBsize - (int)blk_no;
                                        ASSERT(split_bblks > 0);
-                                       if ((error = xlog_bread(log, blk_no,
-                                                       split_bblks, dbp)))
+                                       error = xlog_bread(log, blk_no,
+                                                       split_bblks, dbp,
+                                                       &offset);
+                                       if (error)
                                                goto bread_err2;
-                                       offset = xlog_align(log, blk_no,
-                                                       split_bblks, dbp);
                                }
+
                                /*
                                 * Note: this black magic still works with
                                 * large sector sizes (non-512) only because:
@@ -3702,17 +3665,21 @@ xlog_do_recovery_pass(
                                 *   _first_, then the log start (LR header end)
                                 *   - order is important.
                                 */
-                               bufaddr = XFS_BUF_PTR(dbp);
-                               XFS_BUF_SET_PTR(dbp,
-                                               bufaddr + BBTOB(split_bblks),
+                               error = XFS_BUF_SET_PTR(dbp,
+                                               offset + BBTOB(split_bblks),
                                                BBTOB(bblks - split_bblks));
-                               if ((error = xlog_bread(log, wrapped_hblks,
-                                               bblks - split_bblks, dbp)))
+                               if (error)
+                                       goto bread_err2;
+
+                               error = xlog_bread_noalign(log, wrapped_hblks,
+                                               bblks - split_bblks,
+                                               dbp);
+                               if (error)
+                                       goto bread_err2;
+
+                               error = XFS_BUF_SET_PTR(dbp, offset, h_size);
+                               if (error)
                                        goto bread_err2;
-                               XFS_BUF_SET_PTR(dbp, bufaddr, h_size);
-                               if (!offset)
-                                       offset = xlog_align(log, wrapped_hblks,
-                                               bblks - split_bblks, dbp);
                        }
                        xlog_unpack_data(rhead, offset, log);
                        if ((error = xlog_recover_process_data(log, rhash,
@@ -3726,17 +3693,21 @@ xlog_do_recovery_pass(
 
                /* read first part of physical log */
                while (blk_no < head_blk) {
-                       if ((error = xlog_bread(log, blk_no, hblks, hbp)))
+                       error = xlog_bread(log, blk_no, hblks, hbp, &offset);
+                       if (error)
                                goto bread_err2;
-                       offset = xlog_align(log, blk_no, hblks, hbp);
+
                        rhead = (xlog_rec_header_t *)offset;
                        error = xlog_valid_rec_header(log, rhead, blk_no);
                        if (error)
                                goto bread_err2;
-                       bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
-                       if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
+
+                       bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
+                       error = xlog_bread(log, blk_no+hblks, bblks, dbp,
+                                          &offset);
+                       if (error)
                                goto bread_err2;
-                       offset = xlog_align(log, blk_no+hblks, bblks, dbp);
+
                        xlog_unpack_data(rhead, offset, log);
                        if ((error = xlog_recover_process_data(log, rhash,
                                                        rhead, offset, pass)))
@@ -3786,8 +3757,7 @@ xlog_do_log_recovery(
        error = xlog_do_recovery_pass(log, head_blk, tail_blk,
                                      XLOG_RECOVER_PASS1);
        if (error != 0) {
-               kmem_free(log->l_buf_cancel_table,
-                         XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
+               kmem_free(log->l_buf_cancel_table);
                log->l_buf_cancel_table = NULL;
                return error;
        }
@@ -3798,7 +3768,7 @@ xlog_do_log_recovery(
        error = xlog_do_recovery_pass(log, head_blk, tail_blk,
                                      XLOG_RECOVER_PASS2);
 #ifdef DEBUG
-       {
+       if (!error) {
                int     i;
 
                for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
@@ -3806,8 +3776,7 @@ xlog_do_log_recovery(
        }
 #endif /* DEBUG */
 
-       kmem_free(log->l_buf_cancel_table,
-                 XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
+       kmem_free(log->l_buf_cancel_table);
        log->l_buf_cancel_table = NULL;
 
        return error;
@@ -3860,9 +3829,13 @@ xlog_do_recover(
         */
        bp = xfs_getsb(log->l_mp, 0);
        XFS_BUF_UNDONE(bp);
+       ASSERT(!(XFS_BUF_ISWRITE(bp)));
+       ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
        XFS_BUF_READ(bp);
+       XFS_BUF_UNASYNC(bp);
        xfsbdstrat(log->l_mp, bp);
-       if ((error = xfs_iowait(bp))) {
+       error = xfs_iowait(bp);
+       if (error) {
                xfs_ioerror_alert("xlog_do_recover",
                                  log->l_mp, bp, XFS_BUF_ADDR(bp));
                ASSERT(0);
@@ -3872,11 +3845,14 @@ xlog_do_recover(
 
        /* Convert superblock from on-disk format */
        sbp = &log->l_mp->m_sb;
-       xfs_xlatesb(XFS_BUF_TO_SBP(bp), sbp, 1, XFS_SB_ALL_BITS);
+       xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
        ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
-       ASSERT(XFS_SB_GOOD_VERSION(sbp));
+       ASSERT(xfs_sb_good_version(sbp));
        xfs_buf_relse(bp);
 
+       /* We've re-read the superblock so re-initialize per-cpu counters */
+       xfs_icsb_reinit_counters(log->l_mp);
+
        xlog_recover_check_summary(log);
 
        /* Normal transactions can now occur */
@@ -3912,8 +3888,7 @@ xlog_recover(
                 * under the vfs layer, so we can get away with it unless
                 * the device itself is read-only, in which case we fail.
                 */
-               if ((error = xfs_dev_is_read_only(log->l_mp,
-                                               "recovery required"))) {
+               if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) {
                        return error;
                }
 
@@ -3939,8 +3914,7 @@ xlog_recover(
  */
 int
 xlog_recover_finish(
-       xlog_t          *log,
-       int             mfsi_flags)
+       xlog_t          *log)
 {
        /*
         * Now we're ready to do the transactions needed for the
@@ -3951,19 +3925,23 @@ xlog_recover_finish(
         * rather than accepting new requests.
         */
        if (log->l_flags & XLOG_RECOVERY_NEEDED) {
-               xlog_recover_process_efis(log);
+               int     error;
+               error = xlog_recover_process_efis(log);
+               if (error) {
+                       cmn_err(CE_ALERT,
+                               "Failed to recover EFIs on filesystem: %s",
+                               log->l_mp->m_fsname);
+                       return error;
+               }
                /*
                 * Sync the log to get all the EFIs out of the AIL.
                 * This isn't absolutely necessary, but it helps in
                 * case the unlink transactions would have problems
                 * pushing the EFIs out of the way.
                 */
-               xfs_log_force(log->l_mp, (xfs_lsn_t)0,
-                             (XFS_LOG_FORCE | XFS_LOG_SYNC));
+               xfs_log_force(log->l_mp, XFS_LOG_SYNC);
 
-               if ( (mfsi_flags & XFS_MFSI_NOUNLINK) == 0 ) {
-                       xlog_recover_process_iunlinks(log);
-               }
+               xlog_recover_process_iunlinks(log);
 
                xlog_recover_check_summary(log);
 
@@ -3974,7 +3952,7 @@ xlog_recover_finish(
                log->l_flags &= ~XLOG_RECOVERY_NEEDED;
        } else {
                cmn_err(CE_DEBUG,
-                       "!Ending clean XFS mount for filesystem: %s",
+                       "!Ending clean XFS mount for filesystem: %s\n",
                        log->l_mp->m_fsname);
        }
        return 0;
@@ -3992,19 +3970,13 @@ xlog_recover_check_summary(
 {
        xfs_mount_t     *mp;
        xfs_agf_t       *agfp;
-       xfs_agi_t       *agip;
        xfs_buf_t       *agfbp;
        xfs_buf_t       *agibp;
-       xfs_daddr_t     agfdaddr;
-       xfs_daddr_t     agidaddr;
-       xfs_buf_t       *sbbp;
-#ifdef XFS_LOUD_RECOVERY
-       xfs_sb_t        *sbp;
-#endif
        xfs_agnumber_t  agno;
        __uint64_t      freeblks;
        __uint64_t      itotal;
        __uint64_t      ifree;
+       int             error;
 
        mp = log->l_mp;
 
@@ -4012,62 +3984,27 @@ xlog_recover_check_summary(
        itotal = 0LL;
        ifree = 0LL;
        for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
-               agfdaddr = XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp));
-               agfbp = xfs_buf_read(mp->m_ddev_targp, agfdaddr,
-                               XFS_FSS_TO_BB(mp, 1), 0);
-               if (XFS_BUF_ISERROR(agfbp)) {
-                       xfs_ioerror_alert("xlog_recover_check_summary(agf)",
-                                               mp, agfbp, agfdaddr);
-               }
-               agfp = XFS_BUF_TO_AGF(agfbp);
-               ASSERT(XFS_AGF_MAGIC == be32_to_cpu(agfp->agf_magicnum));
-               ASSERT(XFS_AGF_GOOD_VERSION(be32_to_cpu(agfp->agf_versionnum)));
-               ASSERT(be32_to_cpu(agfp->agf_seqno) == agno);
-
-               freeblks += be32_to_cpu(agfp->agf_freeblks) +
-                           be32_to_cpu(agfp->agf_flcount);
-               xfs_buf_relse(agfbp);
-
-               agidaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp));
-               agibp = xfs_buf_read(mp->m_ddev_targp, agidaddr,
-                               XFS_FSS_TO_BB(mp, 1), 0);
-               if (XFS_BUF_ISERROR(agibp)) {
-                       xfs_ioerror_alert("xlog_recover_check_summary(agi)",
-                                         mp, agibp, agidaddr);
+               error = xfs_read_agf(mp, NULL, agno, 0, &agfbp);
+               if (error) {
+                       xfs_fs_cmn_err(CE_ALERT, mp,
+                                       "xlog_recover_check_summary(agf)"
+                                       "agf read failed agno %d error %d",
+                                                       agno, error);
+               } else {
+                       agfp = XFS_BUF_TO_AGF(agfbp);
+                       freeblks += be32_to_cpu(agfp->agf_freeblks) +
+                                   be32_to_cpu(agfp->agf_flcount);
+                       xfs_buf_relse(agfbp);
                }
-               agip = XFS_BUF_TO_AGI(agibp);
-               ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agip->agi_magicnum));
-               ASSERT(XFS_AGI_GOOD_VERSION(be32_to_cpu(agip->agi_versionnum)));
-               ASSERT(be32_to_cpu(agip->agi_seqno) == agno);
 
-               itotal += be32_to_cpu(agip->agi_count);
-               ifree += be32_to_cpu(agip->agi_freecount);
-               xfs_buf_relse(agibp);
-       }
+               error = xfs_read_agi(mp, NULL, agno, &agibp);
+               if (!error) {
+                       struct xfs_agi  *agi = XFS_BUF_TO_AGI(agibp);
 
-       sbbp = xfs_getsb(mp, 0);
-#ifdef XFS_LOUD_RECOVERY
-       sbp = &mp->m_sb;
-       xfs_xlatesb(XFS_BUF_TO_SBP(sbbp), sbp, 1, XFS_SB_ALL_BITS);
-       cmn_err(CE_NOTE,
-               "xlog_recover_check_summary: sb_icount %Lu itotal %Lu",
-               sbp->sb_icount, itotal);
-       cmn_err(CE_NOTE,
-               "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu",
-               sbp->sb_ifree, ifree);
-       cmn_err(CE_NOTE,
-               "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu",
-               sbp->sb_fdblocks, freeblks);
-#if 0
-       /*
-        * This is turned off until I account for the allocation
-        * btree blocks which live in free space.
-        */
-       ASSERT(sbp->sb_icount == itotal);
-       ASSERT(sbp->sb_ifree == ifree);
-       ASSERT(sbp->sb_fdblocks == freeblks);
-#endif
-#endif
-       xfs_buf_relse(sbbp);
+                       itotal += be32_to_cpu(agi->agi_count);
+                       ifree += be32_to_cpu(agi->agi_freecount);
+                       xfs_buf_relse(agibp);
+               }
+       }
 }
 #endif /* DEBUG */