Merge branch 'bkl/ioctl' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic...
[safe/jmp/linux-2.6] / fs / xfs / xfs_log_recover.c
index abc2034..0de08e3 100644 (file)
 
 STATIC int     xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
 STATIC int     xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
-STATIC void    xlog_recover_insert_item_backq(xlog_recover_item_t **q,
-                                              xlog_recover_item_t *item);
 #if defined(DEBUG)
 STATIC void    xlog_recover_check_summary(xlog_t *);
 #else
 #define        xlog_recover_check_summary(log)
 #endif
 
-
 /*
  * Sector aligned buffer routines for buffer create/read/write/access
  */
 
-#define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs)  \
-       ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \
-       ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) )
-#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno)  ((bno) & ~(log)->l_sectbb_mask)
+/*
+ * Verify the given count of basic blocks is valid number of blocks
+ * to specify for an operation involving the given XFS log buffer.
+ * Returns nonzero if the count is valid, 0 otherwise.
+ */
 
-xfs_buf_t *
+static inline int
+xlog_buf_bbcount_valid(
+       xlog_t          *log,
+       int             bbcount)
+{
+       return bbcount > 0 && bbcount <= log->l_logBBsize;
+}
+
+/*
+ * Allocate a buffer to hold log data.  The buffer needs to be able
+ * to map to a range of nbblks basic blocks at any valid (basic
+ * block) offset within the log.
+ */
+STATIC xfs_buf_t *
 xlog_get_bp(
        xlog_t          *log,
        int             nbblks)
 {
-       if (nbblks <= 0 || nbblks > log->l_logBBsize) {
-               xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks);
-               XFS_ERROR_REPORT("xlog_get_bp(1)",
-                                XFS_ERRLEVEL_HIGH, log->l_mp);
+       if (!xlog_buf_bbcount_valid(log, nbblks)) {
+               xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+                       nbblks);
+               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
                return NULL;
        }
 
-       if (log->l_sectbb_log) {
-               if (nbblks > 1)
-                       nbblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
-               nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
-       }
+       /*
+        * We do log I/O in units of log sectors (a power-of-2
+        * multiple of the basic block size), so we round up the
+        * requested size to acommodate the basic blocks required
+        * for complete log sectors.
+        *
+        * In addition, the buffer may be used for a non-sector-
+        * aligned block offset, in which case an I/O of the
+        * requested size could extend beyond the end of the
+        * buffer.  If the requested size is only 1 basic block it
+        * will never straddle a sector boundary, so this won't be
+        * an issue.  Nor will this be a problem if the log I/O is
+        * done in basic blocks (sector size 1).  But otherwise we
+        * extend the buffer by one extra log sector to ensure
+        * there's space to accomodate this possiblility.
+        */
+       if (nbblks > 1 && log->l_sectBBsize > 1)
+               nbblks += log->l_sectBBsize;
+       nbblks = round_up(nbblks, log->l_sectBBsize);
+
        return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp);
 }
 
-void
+STATIC void
 xlog_put_bp(
        xfs_buf_t       *bp)
 {
        xfs_buf_free(bp);
 }
 
+/*
+ * Return the address of the start of the given block number's data
+ * in a log buffer.  The buffer covers a log sector-aligned region.
+ */
 STATIC xfs_caddr_t
 xlog_align(
        xlog_t          *log,
@@ -102,14 +132,14 @@ xlog_align(
        int             nbblks,
        xfs_buf_t       *bp)
 {
+       xfs_daddr_t     offset;
        xfs_caddr_t     ptr;
 
-       if (!log->l_sectbb_log)
-               return XFS_BUF_PTR(bp);
+       offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1);
+       ptr = XFS_BUF_PTR(bp) + BBTOB(offset);
+
+       ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp));
 
-       ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask);
-       ASSERT(XFS_BUF_SIZE(bp) >=
-               BBTOB(nbblks + (blk_no & log->l_sectbb_mask)));
        return ptr;
 }
 
@@ -126,21 +156,18 @@ xlog_bread_noalign(
 {
        int             error;
 
-       if (nbblks <= 0 || nbblks > log->l_logBBsize) {
-               xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks);
-               XFS_ERROR_REPORT("xlog_bread(1)",
-                                XFS_ERRLEVEL_HIGH, log->l_mp);
+       if (!xlog_buf_bbcount_valid(log, nbblks)) {
+               xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+                       nbblks);
+               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
                return EFSCORRUPTED;
        }
 
-       if (log->l_sectbb_log) {
-               blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
-               nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
-       }
+       blk_no = round_down(blk_no, log->l_sectBBsize);
+       nbblks = round_up(nbblks, log->l_sectBBsize);
 
        ASSERT(nbblks > 0);
        ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
-       ASSERT(bp);
 
        XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
        XFS_BUF_READ(bp);
@@ -188,17 +215,15 @@ xlog_bwrite(
 {
        int             error;
 
-       if (nbblks <= 0 || nbblks > log->l_logBBsize) {
-               xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks);
-               XFS_ERROR_REPORT("xlog_bwrite(1)",
-                                XFS_ERRLEVEL_HIGH, log->l_mp);
+       if (!xlog_buf_bbcount_valid(log, nbblks)) {
+               xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+                       nbblks);
+               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
                return EFSCORRUPTED;
        }
 
-       if (log->l_sectbb_log) {
-               blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
-               nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
-       }
+       blk_no = round_down(blk_no, log->l_sectBBsize);
+       nbblks = round_up(nbblks, log->l_sectBBsize);
 
        ASSERT(nbblks > 0);
        ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
@@ -226,16 +251,10 @@ xlog_header_check_dump(
        xfs_mount_t             *mp,
        xlog_rec_header_t       *head)
 {
-       int                     b;
-
-       cmn_err(CE_DEBUG, "%s:  SB : uuid = ", __func__);
-       for (b = 0; b < 16; b++)
-               cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&mp->m_sb.sb_uuid)[b]);
-       cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT);
-       cmn_err(CE_DEBUG, "    log : uuid = ");
-       for (b = 0; b < 16; b++)
-               cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&head->h_fs_uuid)[b]);
-       cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt));
+       cmn_err(CE_DEBUG, "%s:  SB : uuid = %pU, fmt = %d\n",
+               __func__, &mp->m_sb.sb_uuid, XLOG_FMT);
+       cmn_err(CE_DEBUG, "    log : uuid = %pU, fmt = %d\n",
+               &head->h_fs_uuid, be32_to_cpu(head->h_fmt));
 }
 #else
 #define xlog_header_check_dump(mp, head)
@@ -335,39 +354,38 @@ xlog_find_cycle_start(
 {
        xfs_caddr_t     offset;
        xfs_daddr_t     mid_blk;
+       xfs_daddr_t     end_blk;
        uint            mid_cycle;
        int             error;
 
-       mid_blk = BLK_AVG(first_blk, *last_blk);
-       while (mid_blk != first_blk && mid_blk != *last_blk) {
+       end_blk = *last_blk;
+       mid_blk = BLK_AVG(first_blk, end_blk);
+       while (mid_blk != first_blk && mid_blk != end_blk) {
                error = xlog_bread(log, mid_blk, 1, bp, &offset);
                if (error)
                        return error;
                mid_cycle = xlog_get_cycle(offset);
-               if (mid_cycle == cycle) {
-                       *last_blk = mid_blk;
-                       /* last_half_cycle == mid_cycle */
-               } else {
-                       first_blk = mid_blk;
-                       /* first_half_cycle == mid_cycle */
-               }
-               mid_blk = BLK_AVG(first_blk, *last_blk);
+               if (mid_cycle == cycle)
+                       end_blk = mid_blk;   /* last_half_cycle == mid_cycle */
+               else
+                       first_blk = mid_blk; /* first_half_cycle == mid_cycle */
+               mid_blk = BLK_AVG(first_blk, end_blk);
        }
-       ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) ||
-              (mid_blk == *last_blk && mid_blk-1 == first_blk));
+       ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) ||
+              (mid_blk == end_blk && mid_blk-1 == first_blk));
+
+       *last_blk = end_blk;
 
        return 0;
 }
 
 /*
- * Check that the range of blocks does not contain the cycle number
- * given.  The scan needs to occur from front to back and the ptr into the
- * region must be updated since a later routine will need to perform another
- * test.  If the region is completely good, we end up returning the same
- * last block number.
- *
- * Set blkno to -1 if we encounter no errors.  This is an invalid block number
- * since we don't ever expect logs to get this large.
+ * Check that a range of blocks does not contain stop_on_cycle_no.
+ * Fill in *new_blk with the block offset where such a block is
+ * found, or with -1 (an invalid block number) if there is no such
+ * block in the range.  The scan needs to occur from front to back
+ * and the pointer into the region must be updated since a later
+ * routine will need to perform another test.
  */
 STATIC int
 xlog_find_verify_cycle(
@@ -384,12 +402,16 @@ xlog_find_verify_cycle(
        xfs_caddr_t     buf = NULL;
        int             error = 0;
 
+       /*
+        * Greedily allocate a buffer big enough to handle the full
+        * range of basic blocks we'll be examining.  If that fails,
+        * try a smaller size.  We need to be able to read at least
+        * a log sector, or we're out of luck.
+        */
        bufblks = 1 << ffs(nbblks);
-
        while (!(bp = xlog_get_bp(log, bufblks))) {
-               /* can't get enough memory to do everything in one big buffer */
                bufblks >>= 1;
-               if (bufblks <= log->l_sectbb_log)
+               if (bufblks < log->l_sectBBsize)
                        return ENOMEM;
        }
 
@@ -637,7 +659,7 @@ xlog_find_head(
                 * In this case we want to find the first block with cycle
                 * number matching last_half_cycle.  We expect the log to be
                 * some variation on
-                *        x + 1 ... | x ...
+                *        x + 1 ... | x ... | x
                 * The first block with cycle number x (last_half_cycle) will
                 * be where the new head belongs.  First we do a binary search
                 * for the first occurrence of last_half_cycle.  The binary
@@ -647,11 +669,13 @@ xlog_find_head(
                 * the log, then we look for occurrences of last_half_cycle - 1
                 * at the end of the log.  The cases we're looking for look
                 * like
-                *        x + 1 ... | x | x + 1 | x ...
-                *                               ^ binary search stopped here
+                *                               v binary search stopped here
+                *        x + 1 ... | x | x + 1 | x ... | x
+                *                   ^ but we want to locate this spot
                 * or
-                *        x + 1 ... | x ... | x - 1 | x
                 *        <---------> less than scan distance
+                *        x + 1 ... | x ... | x - 1 | x
+                *                           ^ we want to locate this spot
                 */
                stop_on_cycle = last_half_cycle;
                if ((error = xlog_find_cycle_start(log, bp, first_blk,
@@ -707,16 +731,16 @@ xlog_find_head(
                 * certainly not the head of the log.  By searching for
                 * last_half_cycle-1 we accomplish that.
                 */
-               start_blk = log_bbnum - num_scan_bblks + head_blk;
                ASSERT(head_blk <= INT_MAX &&
-                       (xfs_daddr_t) num_scan_bblks - head_blk >= 0);
+                       (xfs_daddr_t) num_scan_bblks >= head_blk);
+               start_blk = log_bbnum - (num_scan_bblks - head_blk);
                if ((error = xlog_find_verify_cycle(log, start_blk,
                                        num_scan_bblks - (int)head_blk,
                                        (stop_on_cycle - 1), &new_blk)))
                        goto bp_err;
                if (new_blk != -1) {
                        head_blk = new_blk;
-                       goto bad_blk;
+                       goto validate_head;
                }
 
                /*
@@ -734,7 +758,7 @@ xlog_find_head(
                        head_blk = new_blk;
        }
 
- bad_blk:
+validate_head:
        /*
         * Now we need to make sure head_blk is not pointing to a block in
         * the middle of a log record.
@@ -756,7 +780,7 @@ xlog_find_head(
                if ((error = xlog_find_verify_log_record(log, start_blk,
                                                        &head_blk, 0)) == -1) {
                        /* We hit the beginning of the log during our search */
-                       start_blk = log_bbnum - num_scan_bblks + head_blk;
+                       start_blk = log_bbnum - (num_scan_bblks - head_blk);
                        new_blk = log_bbnum;
                        ASSERT(start_blk <= INT_MAX &&
                                (xfs_daddr_t) log_bbnum-start_blk >= 0);
@@ -811,7 +835,7 @@ xlog_find_head(
  * We could speed up search by using current head_blk buffer, but it is not
  * available.
  */
-int
+STATIC int
 xlog_find_tail(
        xlog_t                  *log,
        xfs_daddr_t             *head_blk,
@@ -841,12 +865,12 @@ xlog_find_tail(
        if (*head_blk == 0) {                           /* special case */
                error = xlog_bread(log, 0, 1, bp, &offset);
                if (error)
-                       goto bread_err;
+                       goto done;
 
                if (xlog_get_cycle(offset) == 0) {
                        *tail_blk = 0;
                        /* leave all other log inited values alone */
-                       goto exit;
+                       goto done;
                }
        }
 
@@ -857,7 +881,7 @@ xlog_find_tail(
        for (i = (int)(*head_blk) - 1; i >= 0; i--) {
                error = xlog_bread(log, i, 1, bp, &offset);
                if (error)
-                       goto bread_err;
+                       goto done;
 
                if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
                        found = 1;
@@ -874,7 +898,7 @@ xlog_find_tail(
                for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
                        error = xlog_bread(log, i, 1, bp, &offset);
                        if (error)
-                               goto bread_err;
+                               goto done;
 
                        if (XLOG_HEADER_MAGIC_NUM ==
                            be32_to_cpu(*(__be32 *)offset)) {
@@ -949,7 +973,7 @@ xlog_find_tail(
                umount_data_blk = (i + hblks) % log->l_logBBsize;
                error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
                if (error)
-                       goto bread_err;
+                       goto done;
 
                op_head = (xlog_op_header_t *)offset;
                if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
@@ -995,12 +1019,10 @@ xlog_find_tail(
         * But... if the -device- itself is readonly, just skip this.
         * We can't recover this device anyway, so it won't matter.
         */
-       if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
+       if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp))
                error = xlog_clear_stale_blocks(log, tail_lsn);
-       }
 
-bread_err:
-exit:
+done:
        xlog_put_bp(bp);
 
        if (error)
@@ -1160,16 +1182,22 @@ xlog_write_log_records(
        xfs_caddr_t     offset;
        xfs_buf_t       *bp;
        int             balign, ealign;
-       int             sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
+       int             sectbb = log->l_sectBBsize;
        int             end_block = start_block + blocks;
        int             bufblks;
        int             error = 0;
        int             i, j = 0;
 
+       /*
+        * Greedily allocate a buffer big enough to handle the full
+        * range of basic blocks to be written.  If that fails, try
+        * a smaller size.  We need to be able to write at least a
+        * log sector, or we're out of luck.
+        */
        bufblks = 1 << ffs(blocks);
        while (!(bp = xlog_get_bp(log, bufblks))) {
                bufblks >>= 1;
-               if (bufblks <= log->l_sectbb_log)
+               if (bufblks < sectbb)
                        return ENOMEM;
        }
 
@@ -1177,7 +1205,7 @@ xlog_write_log_records(
         * the buffer in the starting sector not covered by the first
         * write below.
         */
-       balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block);
+       balign = round_down(start_block, sectbb);
        if (balign != start_block) {
                error = xlog_bread_noalign(log, start_block, 1, bp);
                if (error)
@@ -1196,7 +1224,7 @@ xlog_write_log_records(
                 * the buffer in the final sector not covered by the write.
                 * If this is the same sector as the above read, skip it.
                 */
-               ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block);
+               ealign = round_down(end_block, sectbb);
                if (j == 0 && (start_block + endcount > ealign)) {
                        offset = XFS_BUF_PTR(bp);
                        balign = BBTOB(ealign - start_block);
@@ -1373,40 +1401,50 @@ xlog_clear_stale_blocks(
 
 STATIC xlog_recover_t *
 xlog_recover_find_tid(
-       xlog_recover_t          *q,
+       struct hlist_head       *head,
        xlog_tid_t              tid)
 {
-       xlog_recover_t          *p = q;
+       xlog_recover_t          *trans;
+       struct hlist_node       *n;
 
-       while (p != NULL) {
-               if (p->r_log_tid == tid)
-                   break;
-               p = p->r_next;
+       hlist_for_each_entry(trans, n, head, r_list) {
+               if (trans->r_log_tid == tid)
+                       return trans;
        }
-       return p;
+       return NULL;
 }
 
 STATIC void
-xlog_recover_put_hashq(
-       xlog_recover_t          **q,
-       xlog_recover_t          *trans)
+xlog_recover_new_tid(
+       struct hlist_head       *head,
+       xlog_tid_t              tid,
+       xfs_lsn_t               lsn)
 {
-       trans->r_next = *q;
-       *q = trans;
+       xlog_recover_t          *trans;
+
+       trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
+       trans->r_log_tid   = tid;
+       trans->r_lsn       = lsn;
+       INIT_LIST_HEAD(&trans->r_itemq);
+
+       INIT_HLIST_NODE(&trans->r_list);
+       hlist_add_head(&trans->r_list, head);
 }
 
 STATIC void
 xlog_recover_add_item(
-       xlog_recover_item_t     **itemq)
+       struct list_head        *head)
 {
        xlog_recover_item_t     *item;
 
        item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
-       xlog_recover_insert_item_backq(itemq, item);
+       INIT_LIST_HEAD(&item->ri_list);
+       list_add_tail(&item->ri_list, head);
 }
 
 STATIC int
 xlog_recover_add_to_cont_trans(
+       struct log              *log,
        xlog_recover_t          *trans,
        xfs_caddr_t             dp,
        int                     len)
@@ -1415,8 +1453,7 @@ xlog_recover_add_to_cont_trans(
        xfs_caddr_t             ptr, old_ptr;
        int                     old_len;
 
-       item = trans->r_itemq;
-       if (item == NULL) {
+       if (list_empty(&trans->r_itemq)) {
                /* finish copying rest of trans header */
                xlog_recover_add_item(&trans->r_itemq);
                ptr = (xfs_caddr_t) &trans->r_theader +
@@ -1424,7 +1461,8 @@ xlog_recover_add_to_cont_trans(
                memcpy(ptr, dp, len); /* d, s, l */
                return 0;
        }
-       item = item->ri_prev;
+       /* take the tail entry */
+       item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
 
        old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
        old_len = item->ri_buf[item->ri_cnt-1].i_len;
@@ -1433,6 +1471,7 @@ xlog_recover_add_to_cont_trans(
        memcpy(&ptr[old_len], dp, len); /* d, s, l */
        item->ri_buf[item->ri_cnt-1].i_len += len;
        item->ri_buf[item->ri_cnt-1].i_addr = ptr;
+       trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
        return 0;
 }
 
@@ -1451,6 +1490,7 @@ xlog_recover_add_to_cont_trans(
  */
 STATIC int
 xlog_recover_add_to_trans(
+       struct log              *log,
        xlog_recover_t          *trans,
        xfs_caddr_t             dp,
        int                     len)
@@ -1461,8 +1501,7 @@ xlog_recover_add_to_trans(
 
        if (!len)
                return 0;
-       item = trans->r_itemq;
-       if (item == NULL) {
+       if (list_empty(&trans->r_itemq)) {
                /* we need to catch log corruptions here */
                if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
                        xlog_warn("XFS: xlog_recover_add_to_trans: "
@@ -1480,12 +1519,15 @@ xlog_recover_add_to_trans(
        memcpy(ptr, dp, len);
        in_f = (xfs_inode_log_format_t *)ptr;
 
-       if (item->ri_prev->ri_total != 0 &&
-            item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
+       /* take the tail entry */
+       item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
+       if (item->ri_total != 0 &&
+            item->ri_total == item->ri_cnt) {
+               /* tail item is in use, get a new one */
                xlog_recover_add_item(&trans->r_itemq);
+               item = list_entry(trans->r_itemq.prev,
+                                       xlog_recover_item_t, ri_list);
        }
-       item = trans->r_itemq;
-       item = item->ri_prev;
 
        if (item->ri_total == 0) {              /* first region to be added */
                if (in_f->ilf_size == 0 ||
@@ -1507,99 +1549,37 @@ xlog_recover_add_to_trans(
        item->ri_buf[item->ri_cnt].i_addr = ptr;
        item->ri_buf[item->ri_cnt].i_len  = len;
        item->ri_cnt++;
+       trace_xfs_log_recover_item_add(log, trans, item, 0);
        return 0;
 }
 
-STATIC void
-xlog_recover_new_tid(
-       xlog_recover_t          **q,
-       xlog_tid_t              tid,
-       xfs_lsn_t               lsn)
-{
-       xlog_recover_t          *trans;
-
-       trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
-       trans->r_log_tid   = tid;
-       trans->r_lsn       = lsn;
-       xlog_recover_put_hashq(q, trans);
-}
-
-STATIC int
-xlog_recover_unlink_tid(
-       xlog_recover_t          **q,
-       xlog_recover_t          *trans)
-{
-       xlog_recover_t          *tp;
-       int                     found = 0;
-
-       ASSERT(trans != NULL);
-       if (trans == *q) {
-               *q = (*q)->r_next;
-       } else {
-               tp = *q;
-               while (tp) {
-                       if (tp->r_next == trans) {
-                               found = 1;
-                               break;
-                       }
-                       tp = tp->r_next;
-               }
-               if (!found) {
-                       xlog_warn(
-                            "XFS: xlog_recover_unlink_tid: trans not found");
-                       ASSERT(0);
-                       return XFS_ERROR(EIO);
-               }
-               tp->r_next = tp->r_next->r_next;
-       }
-       return 0;
-}
-
-STATIC void
-xlog_recover_insert_item_backq(
-       xlog_recover_item_t     **q,
-       xlog_recover_item_t     *item)
-{
-       if (*q == NULL) {
-               item->ri_prev = item->ri_next = item;
-               *q = item;
-       } else {
-               item->ri_next           = *q;
-               item->ri_prev           = (*q)->ri_prev;
-               (*q)->ri_prev           = item;
-               item->ri_prev->ri_next  = item;
-       }
-}
-
-STATIC void
-xlog_recover_insert_item_frontq(
-       xlog_recover_item_t     **q,
-       xlog_recover_item_t     *item)
-{
-       xlog_recover_insert_item_backq(q, item);
-       *q = item;
-}
-
+/*
+ * Sort the log items in the transaction. Cancelled buffers need
+ * to be put first so they are processed before any items that might
+ * modify the buffers. If they are cancelled, then the modifications
+ * don't need to be replayed.
+ */
 STATIC int
 xlog_recover_reorder_trans(
-       xlog_recover_t          *trans)
+       struct log              *log,
+       xlog_recover_t          *trans,
+       int                     pass)
 {
-       xlog_recover_item_t     *first_item, *itemq, *itemq_next;
-       xfs_buf_log_format_t    *buf_f;
-       ushort                  flags = 0;
+       xlog_recover_item_t     *item, *n;
+       LIST_HEAD(sort_list);
+
+       list_splice_init(&trans->r_itemq, &sort_list);
+       list_for_each_entry_safe(item, n, &sort_list, ri_list) {
+               xfs_buf_log_format_t    *buf_f;
 
-       first_item = itemq = trans->r_itemq;
-       trans->r_itemq = NULL;
-       do {
-               itemq_next = itemq->ri_next;
-               buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr;
+               buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
 
-               switch (ITEM_TYPE(itemq)) {
+               switch (ITEM_TYPE(item)) {
                case XFS_LI_BUF:
-                       flags = buf_f->blf_flags;
-                       if (!(flags & XFS_BLI_CANCEL)) {
-                               xlog_recover_insert_item_frontq(&trans->r_itemq,
-                                                               itemq);
+                       if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) {
+                               trace_xfs_log_recover_item_reorder_head(log,
+                                                       trans, item, pass);
+                               list_move(&item->ri_list, &trans->r_itemq);
                                break;
                        }
                case XFS_LI_INODE:
@@ -1607,7 +1587,9 @@ xlog_recover_reorder_trans(
                case XFS_LI_QUOTAOFF:
                case XFS_LI_EFD:
                case XFS_LI_EFI:
-                       xlog_recover_insert_item_backq(&trans->r_itemq, itemq);
+                       trace_xfs_log_recover_item_reorder_tail(log,
+                                                       trans, item, pass);
+                       list_move_tail(&item->ri_list, &trans->r_itemq);
                        break;
                default:
                        xlog_warn(
@@ -1615,8 +1597,8 @@ xlog_recover_reorder_trans(
                        ASSERT(0);
                        return XFS_ERROR(EIO);
                }
-               itemq = itemq_next;
-       } while (first_item != itemq);
+       }
+       ASSERT(list_empty(&sort_list));
        return 0;
 }
 
@@ -1656,8 +1638,10 @@ xlog_recover_do_buffer_pass1(
        /*
         * If this isn't a cancel buffer item, then just return.
         */
-       if (!(flags & XFS_BLI_CANCEL))
+       if (!(flags & XFS_BLI_CANCEL)) {
+               trace_xfs_log_recover_buf_not_cancel(log, buf_f);
                return;
+       }
 
        /*
         * Insert an xfs_buf_cancel record into the hash table of
@@ -1691,6 +1675,7 @@ xlog_recover_do_buffer_pass1(
        while (nextp != NULL) {
                if (nextp->bc_blkno == blkno && nextp->bc_len == len) {
                        nextp->bc_refcount++;
+                       trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
                        return;
                }
                prevp = nextp;
@@ -1704,6 +1689,7 @@ xlog_recover_do_buffer_pass1(
        bcp->bc_refcount = 1;
        bcp->bc_next = NULL;
        prevp->bc_next = bcp;
+       trace_xfs_log_recover_buf_cancel_add(log, buf_f);
 }
 
 /*
@@ -1843,6 +1829,8 @@ xlog_recover_do_inode_buffer(
        unsigned int            *data_map = NULL;
        unsigned int            map_size = 0;
 
+       trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
+
        switch (buf_f->blf_type) {
        case XFS_LI_BUF:
                data_map = buf_f->blf_data_map;
@@ -1938,6 +1926,7 @@ xlog_recover_do_inode_buffer(
 /*ARGSUSED*/
 STATIC void
 xlog_recover_do_reg_buffer(
+       struct xfs_mount        *mp,
        xlog_recover_item_t     *item,
        xfs_buf_t               *bp,
        xfs_buf_log_format_t    *buf_f)
@@ -1949,6 +1938,8 @@ xlog_recover_do_reg_buffer(
        unsigned int            map_size = 0;
        int                     error;
 
+       trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
+
        switch (buf_f->blf_type) {
        case XFS_LI_BUF:
                data_map = buf_f->blf_data_map;
@@ -2147,6 +2138,8 @@ xlog_recover_do_dquot_buffer(
 {
        uint                    type;
 
+       trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
+
        /*
         * Filesystems are required to send in quota flags at mount time.
         */
@@ -2167,7 +2160,7 @@ xlog_recover_do_dquot_buffer(
        if (log->l_quotaoffs_flag & type)
                return;
 
-       xlog_recover_do_reg_buffer(item, bp, buf_f);
+       xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
 }
 
 /*
@@ -2228,9 +2221,11 @@ xlog_recover_do_buffer_trans(
                 */
                cancel = xlog_recover_do_buffer_pass2(log, buf_f);
                if (cancel) {
+                       trace_xfs_log_recover_buf_cancel(log, buf_f);
                        return 0;
                }
        }
+       trace_xfs_log_recover_buf_recover(log, buf_f);
        switch (buf_f->blf_type) {
        case XFS_LI_BUF:
                blkno = buf_f->blf_blkno;
@@ -2248,9 +2243,9 @@ xlog_recover_do_buffer_trans(
        }
 
        mp = log->l_mp;
-       buf_flags = XFS_BUF_LOCK;
+       buf_flags = XBF_LOCK;
        if (!(flags & XFS_BLI_INODE_BUF))
-               buf_flags |= XFS_BUF_MAPPED;
+               buf_flags |= XBF_MAPPED;
 
        bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags);
        if (XFS_BUF_ISERROR(bp)) {
@@ -2268,7 +2263,7 @@ xlog_recover_do_buffer_trans(
                  (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
                xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
        } else {
-               xlog_recover_do_reg_buffer(item, bp, buf_f);
+               xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
        }
        if (error)
                return XFS_ERROR(error);
@@ -2348,11 +2343,13 @@ xlog_recover_do_inode_trans(
        if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
                                        in_f->ilf_len, 0)) {
                error = 0;
+               trace_xfs_log_recover_inode_cancel(log, in_f);
                goto error;
        }
+       trace_xfs_log_recover_inode_recover(log, in_f);
 
        bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
-                         XFS_BUF_LOCK);
+                         XBF_LOCK);
        if (XFS_BUF_ISERROR(bp)) {
                xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
                                  bp, in_f->ilf_blkno);
@@ -2401,6 +2398,7 @@ xlog_recover_do_inode_trans(
                        /* do nothing */
                } else {
                        xfs_buf_relse(bp);
+                       trace_xfs_log_recover_inode_skip(log, in_f);
                        error = 0;
                        goto error;
                }
@@ -2820,14 +2818,14 @@ xlog_recover_do_trans(
        int                     pass)
 {
        int                     error = 0;
-       xlog_recover_item_t     *item, *first_item;
+       xlog_recover_item_t     *item;
 
-       error = xlog_recover_reorder_trans(trans);
+       error = xlog_recover_reorder_trans(log, trans, pass);
        if (error)
                return error;
 
-       first_item = item = trans->r_itemq;
-       do {
+       list_for_each_entry(item, &trans->r_itemq, ri_list) {
+               trace_xfs_log_recover_item_recover(log, trans, item, pass);
                switch (ITEM_TYPE(item)) {
                case XFS_LI_BUF:
                        error = xlog_recover_do_buffer_trans(log, item, pass);
@@ -2860,8 +2858,7 @@ xlog_recover_do_trans(
 
                if (error)
                        return error;
-               item = item->ri_next;
-       } while (first_item != item);
+       }
 
        return 0;
 }
@@ -2875,21 +2872,18 @@ STATIC void
 xlog_recover_free_trans(
        xlog_recover_t          *trans)
 {
-       xlog_recover_item_t     *first_item, *item, *free_item;
+       xlog_recover_item_t     *item, *n;
        int                     i;
 
-       item = first_item = trans->r_itemq;
-       do {
-               free_item = item;
-               item = item->ri_next;
-                /* Free the regions in the item. */
-               for (i = 0; i < free_item->ri_cnt; i++) {
-                       kmem_free(free_item->ri_buf[i].i_addr);
-               }
+       list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
+               /* Free the regions in the item. */
+               list_del(&item->ri_list);
+               for (i = 0; i < item->ri_cnt; i++)
+                       kmem_free(item->ri_buf[i].i_addr);
                /* Free the item itself */
-               kmem_free(free_item->ri_buf);
-               kmem_free(free_item);
-       } while (first_item != item);
+               kmem_free(item->ri_buf);
+               kmem_free(item);
+       }
        /* Free the transaction recover structure */
        kmem_free(trans);
 }
@@ -2897,14 +2891,12 @@ xlog_recover_free_trans(
 STATIC int
 xlog_recover_commit_trans(
        xlog_t                  *log,
-       xlog_recover_t          **q,
        xlog_recover_t          *trans,
        int                     pass)
 {
        int                     error;
 
-       if ((error = xlog_recover_unlink_tid(q, trans)))
-               return error;
+       hlist_del(&trans->r_list);
        if ((error = xlog_recover_do_trans(log, trans, pass)))
                return error;
        xlog_recover_free_trans(trans);                 /* no error */
@@ -2932,7 +2924,7 @@ xlog_recover_unmount_trans(
 STATIC int
 xlog_recover_process_data(
        xlog_t                  *log,
-       xlog_recover_t          *rhash[],
+       struct hlist_head       rhash[],
        xlog_rec_header_t       *rhead,
        xfs_caddr_t             dp,
        int                     pass)
@@ -2966,7 +2958,7 @@ xlog_recover_process_data(
                }
                tid = be32_to_cpu(ohead->oh_tid);
                hash = XLOG_RHASH(tid);
-               trans = xlog_recover_find_tid(rhash[hash], tid);
+               trans = xlog_recover_find_tid(&rhash[hash], tid);
                if (trans == NULL) {               /* not found; add new tid */
                        if (ohead->oh_flags & XLOG_START_TRANS)
                                xlog_recover_new_tid(&rhash[hash], tid,
@@ -2984,14 +2976,15 @@ xlog_recover_process_data(
                        switch (flags) {
                        case XLOG_COMMIT_TRANS:
                                error = xlog_recover_commit_trans(log,
-                                               &rhash[hash], trans, pass);
+                                                               trans, pass);
                                break;
                        case XLOG_UNMOUNT_TRANS:
                                error = xlog_recover_unmount_trans(trans);
                                break;
                        case XLOG_WAS_CONT_TRANS:
-                               error = xlog_recover_add_to_cont_trans(trans,
-                                               dp, be32_to_cpu(ohead->oh_len));
+                               error = xlog_recover_add_to_cont_trans(log,
+                                               trans, dp,
+                                               be32_to_cpu(ohead->oh_len));
                                break;
                        case XLOG_START_TRANS:
                                xlog_warn(
@@ -3001,7 +2994,7 @@ xlog_recover_process_data(
                                break;
                        case 0:
                        case XLOG_CONTINUE_TRANS:
-                               error = xlog_recover_add_to_trans(trans,
+                               error = xlog_recover_add_to_trans(log, trans,
                                                dp, be32_to_cpu(ohead->oh_len));
                                break;
                        default:
@@ -3217,7 +3210,7 @@ xlog_recover_process_one_iunlink(
        /*
         * Get the on disk inode to find the next inode in the bucket.
         */
-       error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK);
+       error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK);
        if (error)
                goto fail_iput;
 
@@ -3402,42 +3395,6 @@ xlog_pack_data(
        }
 }
 
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-STATIC void
-xlog_unpack_data_checksum(
-       xlog_rec_header_t       *rhead,
-       xfs_caddr_t             dp,
-       xlog_t                  *log)
-{
-       __be32                  *up = (__be32 *)dp;
-       uint                    chksum = 0;
-       int                     i;
-
-       /* divide length by 4 to get # words */
-       for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) {
-               chksum ^= be32_to_cpu(*up);
-               up++;
-       }
-       if (chksum != be32_to_cpu(rhead->h_chksum)) {
-           if (rhead->h_chksum ||
-               ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
-                   cmn_err(CE_DEBUG,
-                       "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n",
-                           be32_to_cpu(rhead->h_chksum), chksum);
-                   cmn_err(CE_DEBUG,
-"XFS: Disregard message if filesystem was created with non-DEBUG kernel");
-                   if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
-                           cmn_err(CE_DEBUG,
-                               "XFS: LogR this is a LogV2 filesystem\n");
-                   }
-                   log->l_flags |= XLOG_CHKSUM_MISMATCH;
-           }
-       }
-}
-#else
-#define xlog_unpack_data_checksum(rhead, dp, log)
-#endif
-
 STATIC void
 xlog_unpack_data(
        xlog_rec_header_t       *rhead,
@@ -3461,8 +3418,6 @@ xlog_unpack_data(
                        dp += BBSIZE;
                }
        }
-
-       xlog_unpack_data_checksum(rhead, dp, log);
 }
 
 STATIC int
@@ -3523,7 +3478,7 @@ xlog_do_recovery_pass(
        int                     error = 0, h_size;
        int                     bblks, split_bblks;
        int                     hblks, split_hblks, wrapped_hblks;
-       xlog_recover_t          *rhash[XLOG_RHASH_SIZE];
+       struct hlist_head       rhash[XLOG_RHASH_SIZE];
 
        ASSERT(head_blk != tail_blk);
 
@@ -3561,7 +3516,7 @@ xlog_do_recovery_pass(
                        hblks = 1;
                }
        } else {
-               ASSERT(log->l_sectbb_log == 0);
+               ASSERT(log->l_sectBBsize == 1);
                hblks = 1;
                hbp = xlog_get_bp(log, 1);
                h_size = XLOG_BIG_RECORD_BSIZE;
@@ -3984,8 +3939,7 @@ xlog_recover_finish(
                 * case the unlink transactions would have problems
                 * pushing the EFIs out of the way.
                 */
-               xfs_log_force(log->l_mp, (xfs_lsn_t)0,
-                             (XFS_LOG_FORCE | XFS_LOG_SYNC));
+               xfs_log_force(log->l_mp, XFS_LOG_SYNC);
 
                xlog_recover_process_iunlinks(log);
 
@@ -4018,10 +3972,6 @@ xlog_recover_check_summary(
        xfs_agf_t       *agfp;
        xfs_buf_t       *agfbp;
        xfs_buf_t       *agibp;
-       xfs_buf_t       *sbbp;
-#ifdef XFS_LOUD_RECOVERY
-       xfs_sb_t        *sbp;
-#endif
        xfs_agnumber_t  agno;
        __uint64_t      freeblks;
        __uint64_t      itotal;
@@ -4056,30 +4006,5 @@ xlog_recover_check_summary(
                        xfs_buf_relse(agibp);
                }
        }
-
-       sbbp = xfs_getsb(mp, 0);
-#ifdef XFS_LOUD_RECOVERY
-       sbp = &mp->m_sb;
-       xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(sbbp));
-       cmn_err(CE_NOTE,
-               "xlog_recover_check_summary: sb_icount %Lu itotal %Lu",
-               sbp->sb_icount, itotal);
-       cmn_err(CE_NOTE,
-               "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu",
-               sbp->sb_ifree, ifree);
-       cmn_err(CE_NOTE,
-               "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu",
-               sbp->sb_fdblocks, freeblks);
-#if 0
-       /*
-        * This is turned off until I account for the allocation
-        * btree blocks which live in free space.
-        */
-       ASSERT(sbp->sb_icount == itotal);
-       ASSERT(sbp->sb_ifree == ifree);
-       ASSERT(sbp->sb_fdblocks == freeblks);
-#endif
-#endif
-       xfs_buf_relse(sbbp);
 }
 #endif /* DEBUG */