X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fxfs%2Fxfs_log_recover.c;h=0de08e366315033765870b758073d8794fb1b1e9;hb=f13771187b9423b824f32518319f6da85d819003;hp=c37521467fdc94926dad7e470253b7f9d0311f11;hpb=34a622b2e1c8e11c8990184634f101c1aad42fec;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index c375214..0de08e3 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -36,7 +36,6 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" -#include "xfs_imap.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_log_priv.h" @@ -47,57 +46,109 @@ #include "xfs_quota.h" #include "xfs_rw.h" #include "xfs_utils.h" +#include "xfs_trace.h" STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); -STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q, - xlog_recover_item_t *item); #if defined(DEBUG) STATIC void xlog_recover_check_summary(xlog_t *); -STATIC void xlog_recover_check_ail(xfs_mount_t *, xfs_log_item_t *, int); #else #define xlog_recover_check_summary(log) -#define xlog_recover_check_ail(mp, lip, gen) #endif - /* * Sector aligned buffer routines for buffer create/read/write/access */ -#define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \ - ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \ - ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) -#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) +/* + * Verify the given count of basic blocks is valid number of blocks + * to specify for an operation involving the given XFS log buffer. + * Returns nonzero if the count is valid, 0 otherwise. + */ -xfs_buf_t * -xlog_get_bp( +static inline int +xlog_buf_bbcount_valid( xlog_t *log, - int num_bblks) + int bbcount) { - ASSERT(num_bblks > 0); + return bbcount > 0 && bbcount <= log->l_logBBsize; +} - if (log->l_sectbb_log) { - if (num_bblks > 1) - num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); - num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks); +/* + * Allocate a buffer to hold log data. The buffer needs to be able + * to map to a range of nbblks basic blocks at any valid (basic + * block) offset within the log. + */ +STATIC xfs_buf_t * +xlog_get_bp( + xlog_t *log, + int nbblks) +{ + if (!xlog_buf_bbcount_valid(log, nbblks)) { + xlog_warn("XFS: Invalid block length (0x%x) given for buffer", + nbblks); + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); + return NULL; } - return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp); + + /* + * We do log I/O in units of log sectors (a power-of-2 + * multiple of the basic block size), so we round up the + * requested size to acommodate the basic blocks required + * for complete log sectors. + * + * In addition, the buffer may be used for a non-sector- + * aligned block offset, in which case an I/O of the + * requested size could extend beyond the end of the + * buffer. If the requested size is only 1 basic block it + * will never straddle a sector boundary, so this won't be + * an issue. Nor will this be a problem if the log I/O is + * done in basic blocks (sector size 1). But otherwise we + * extend the buffer by one extra log sector to ensure + * there's space to accomodate this possiblility. + */ + if (nbblks > 1 && log->l_sectBBsize > 1) + nbblks += log->l_sectBBsize; + nbblks = round_up(nbblks, log->l_sectBBsize); + + return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); } -void +STATIC void xlog_put_bp( xfs_buf_t *bp) { xfs_buf_free(bp); } +/* + * Return the address of the start of the given block number's data + * in a log buffer. The buffer covers a log sector-aligned region. + */ +STATIC xfs_caddr_t +xlog_align( + xlog_t *log, + xfs_daddr_t blk_no, + int nbblks, + xfs_buf_t *bp) +{ + xfs_daddr_t offset; + xfs_caddr_t ptr; + + offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1); + ptr = XFS_BUF_PTR(bp) + BBTOB(offset); + + ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp)); + + return ptr; +} + /* * nbblks should be uint, but oh well. Just want to catch that 32-bit length. */ -int -xlog_bread( +STATIC int +xlog_bread_noalign( xlog_t *log, xfs_daddr_t blk_no, int nbblks, @@ -105,14 +156,18 @@ xlog_bread( { int error; - if (log->l_sectbb_log) { - blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); - nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); + if (!xlog_buf_bbcount_valid(log, nbblks)) { + xlog_warn("XFS: Invalid block length (0x%x) given for buffer", + nbblks); + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); + return EFSCORRUPTED; } + blk_no = round_down(blk_no, log->l_sectBBsize); + nbblks = round_up(nbblks, log->l_sectBBsize); + ASSERT(nbblks > 0); ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); - ASSERT(bp); XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); XFS_BUF_READ(bp); @@ -121,12 +176,31 @@ xlog_bread( XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); xfsbdstrat(log->l_mp, bp); - if ((error = xfs_iowait(bp))) + error = xfs_iowait(bp); + if (error) xfs_ioerror_alert("xlog_bread", log->l_mp, bp, XFS_BUF_ADDR(bp)); return error; } +STATIC int +xlog_bread( + xlog_t *log, + xfs_daddr_t blk_no, + int nbblks, + xfs_buf_t *bp, + xfs_caddr_t *offset) +{ + int error; + + error = xlog_bread_noalign(log, blk_no, nbblks, bp); + if (error) + return error; + + *offset = xlog_align(log, blk_no, nbblks, bp); + return 0; +} + /* * Write out the buffer at the given block for the given number of blocks. * The buffer is kept locked across the write and is returned locked. @@ -141,11 +215,16 @@ xlog_bwrite( { int error; - if (log->l_sectbb_log) { - blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); - nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); + if (!xlog_buf_bbcount_valid(log, nbblks)) { + xlog_warn("XFS: Invalid block length (0x%x) given for buffer", + nbblks); + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); + return EFSCORRUPTED; } + blk_no = round_down(blk_no, log->l_sectBBsize); + nbblks = round_up(nbblks, log->l_sectBBsize); + ASSERT(nbblks > 0); ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); @@ -163,24 +242,6 @@ xlog_bwrite( return error; } -STATIC xfs_caddr_t -xlog_align( - xlog_t *log, - xfs_daddr_t blk_no, - int nbblks, - xfs_buf_t *bp) -{ - xfs_caddr_t ptr; - - if (!log->l_sectbb_log) - return XFS_BUF_PTR(bp); - - ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask); - ASSERT(XFS_BUF_SIZE(bp) >= - BBTOB(nbblks + (blk_no & log->l_sectbb_mask))); - return ptr; -} - #ifdef DEBUG /* * dump debug superblock and log record information @@ -190,16 +251,10 @@ xlog_header_check_dump( xfs_mount_t *mp, xlog_rec_header_t *head) { - int b; - - cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); - for (b = 0; b < 16; b++) - cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); - cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); - cmn_err(CE_DEBUG, " log : uuid = "); - for (b = 0; b < 16; b++) - cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]); - cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt)); + cmn_err(CE_DEBUG, "%s: SB : uuid = %pU, fmt = %d\n", + __func__, &mp->m_sb.sb_uuid, XLOG_FMT); + cmn_err(CE_DEBUG, " log : uuid = %pU, fmt = %d\n", + &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); } #else #define xlog_header_check_dump(mp, head) @@ -269,21 +324,16 @@ STATIC void xlog_recover_iodone( struct xfs_buf *bp) { - xfs_mount_t *mp; - - ASSERT(XFS_BUF_FSPRIVATE(bp, void *)); - if (XFS_BUF_GETERROR(bp)) { /* * We're not going to bother about retrying * this during recovery. One strike! */ - mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *); xfs_ioerror_alert("xlog_recover_iodone", - mp, bp, XFS_BUF_ADDR(bp)); - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + bp->b_mount, bp, XFS_BUF_ADDR(bp)); + xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); } - XFS_BUF_SET_FSPRIVATE(bp, NULL); + bp->b_mount = NULL; XFS_BUF_CLR_IODONE_FUNC(bp); xfs_biodone(bp); } @@ -304,39 +354,38 @@ xlog_find_cycle_start( { xfs_caddr_t offset; xfs_daddr_t mid_blk; + xfs_daddr_t end_blk; uint mid_cycle; int error; - mid_blk = BLK_AVG(first_blk, *last_blk); - while (mid_blk != first_blk && mid_blk != *last_blk) { - if ((error = xlog_bread(log, mid_blk, 1, bp))) + end_blk = *last_blk; + mid_blk = BLK_AVG(first_blk, end_blk); + while (mid_blk != first_blk && mid_blk != end_blk) { + error = xlog_bread(log, mid_blk, 1, bp, &offset); + if (error) return error; - offset = xlog_align(log, mid_blk, 1, bp); mid_cycle = xlog_get_cycle(offset); - if (mid_cycle == cycle) { - *last_blk = mid_blk; - /* last_half_cycle == mid_cycle */ - } else { - first_blk = mid_blk; - /* first_half_cycle == mid_cycle */ - } - mid_blk = BLK_AVG(first_blk, *last_blk); + if (mid_cycle == cycle) + end_blk = mid_blk; /* last_half_cycle == mid_cycle */ + else + first_blk = mid_blk; /* first_half_cycle == mid_cycle */ + mid_blk = BLK_AVG(first_blk, end_blk); } - ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || - (mid_blk == *last_blk && mid_blk-1 == first_blk)); + ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) || + (mid_blk == end_blk && mid_blk-1 == first_blk)); + + *last_blk = end_blk; return 0; } /* - * Check that the range of blocks does not contain the cycle number - * given. The scan needs to occur from front to back and the ptr into the - * region must be updated since a later routine will need to perform another - * test. If the region is completely good, we end up returning the same - * last block number. - * - * Set blkno to -1 if we encounter no errors. This is an invalid block number - * since we don't ever expect logs to get this large. + * Check that a range of blocks does not contain stop_on_cycle_no. + * Fill in *new_blk with the block offset where such a block is + * found, or with -1 (an invalid block number) if there is no such + * block in the range. The scan needs to occur from front to back + * and the pointer into the region must be updated since a later + * routine will need to perform another test. */ STATIC int xlog_find_verify_cycle( @@ -353,12 +402,16 @@ xlog_find_verify_cycle( xfs_caddr_t buf = NULL; int error = 0; + /* + * Greedily allocate a buffer big enough to handle the full + * range of basic blocks we'll be examining. If that fails, + * try a smaller size. We need to be able to read at least + * a log sector, or we're out of luck. + */ bufblks = 1 << ffs(nbblks); - while (!(bp = xlog_get_bp(log, bufblks))) { - /* can't get enough memory to do everything in one big buffer */ bufblks >>= 1; - if (bufblks <= log->l_sectbb_log) + if (bufblks < log->l_sectBBsize) return ENOMEM; } @@ -367,10 +420,10 @@ xlog_find_verify_cycle( bcount = min(bufblks, (start_blk + nbblks - i)); - if ((error = xlog_bread(log, i, bcount, bp))) + error = xlog_bread(log, i, bcount, bp, &buf); + if (error) goto out; - buf = xlog_align(log, i, bcount, bp); for (j = 0; j < bcount; j++) { cycle = xlog_get_cycle(buf); if (cycle == stop_on_cycle_no) { @@ -424,9 +477,9 @@ xlog_find_verify_log_record( return ENOMEM; smallmem = 1; } else { - if ((error = xlog_bread(log, start_blk, num_blks, bp))) + error = xlog_bread(log, start_blk, num_blks, bp, &offset); + if (error) goto out; - offset = xlog_align(log, start_blk, num_blks, bp); offset += ((num_blks - 1) << BBSHIFT); } @@ -441,9 +494,9 @@ xlog_find_verify_log_record( } if (smallmem) { - if ((error = xlog_bread(log, i, 1, bp))) + error = xlog_bread(log, i, 1, bp, &offset); + if (error) goto out; - offset = xlog_align(log, i, 1, bp); } head = (xlog_rec_header_t *)offset; @@ -547,15 +600,18 @@ xlog_find_head( bp = xlog_get_bp(log, 1); if (!bp) return ENOMEM; - if ((error = xlog_bread(log, 0, 1, bp))) + + error = xlog_bread(log, 0, 1, bp, &offset); + if (error) goto bp_err; - offset = xlog_align(log, 0, 1, bp); + first_half_cycle = xlog_get_cycle(offset); last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ - if ((error = xlog_bread(log, last_blk, 1, bp))) + error = xlog_bread(log, last_blk, 1, bp, &offset); + if (error) goto bp_err; - offset = xlog_align(log, last_blk, 1, bp); + last_half_cycle = xlog_get_cycle(offset); ASSERT(last_half_cycle != 0); @@ -603,7 +659,7 @@ xlog_find_head( * In this case we want to find the first block with cycle * number matching last_half_cycle. We expect the log to be * some variation on - * x + 1 ... | x ... + * x + 1 ... | x ... | x * The first block with cycle number x (last_half_cycle) will * be where the new head belongs. First we do a binary search * for the first occurrence of last_half_cycle. The binary @@ -613,11 +669,13 @@ xlog_find_head( * the log, then we look for occurrences of last_half_cycle - 1 * at the end of the log. The cases we're looking for look * like - * x + 1 ... | x | x + 1 | x ... - * ^ binary search stopped here + * v binary search stopped here + * x + 1 ... | x | x + 1 | x ... | x + * ^ but we want to locate this spot * or - * x + 1 ... | x ... | x - 1 | x * <---------> less than scan distance + * x + 1 ... | x ... | x - 1 | x + * ^ we want to locate this spot */ stop_on_cycle = last_half_cycle; if ((error = xlog_find_cycle_start(log, bp, first_blk, @@ -673,16 +731,16 @@ xlog_find_head( * certainly not the head of the log. By searching for * last_half_cycle-1 we accomplish that. */ - start_blk = log_bbnum - num_scan_bblks + head_blk; ASSERT(head_blk <= INT_MAX && - (xfs_daddr_t) num_scan_bblks - head_blk >= 0); + (xfs_daddr_t) num_scan_bblks >= head_blk); + start_blk = log_bbnum - (num_scan_bblks - head_blk); if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks - (int)head_blk, (stop_on_cycle - 1), &new_blk))) goto bp_err; if (new_blk != -1) { head_blk = new_blk; - goto bad_blk; + goto validate_head; } /* @@ -700,7 +758,7 @@ xlog_find_head( head_blk = new_blk; } - bad_blk: +validate_head: /* * Now we need to make sure head_blk is not pointing to a block in * the middle of a log record. @@ -722,7 +780,7 @@ xlog_find_head( if ((error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0)) == -1) { /* We hit the beginning of the log during our search */ - start_blk = log_bbnum - num_scan_bblks + head_blk; + start_blk = log_bbnum - (num_scan_bblks - head_blk); new_blk = log_bbnum; ASSERT(start_blk <= INT_MAX && (xfs_daddr_t) log_bbnum-start_blk >= 0); @@ -777,7 +835,7 @@ xlog_find_head( * We could speed up search by using current head_blk buffer, but it is not * available. */ -int +STATIC int xlog_find_tail( xlog_t *log, xfs_daddr_t *head_blk, @@ -805,13 +863,14 @@ xlog_find_tail( if (!bp) return ENOMEM; if (*head_blk == 0) { /* special case */ - if ((error = xlog_bread(log, 0, 1, bp))) - goto bread_err; - offset = xlog_align(log, 0, 1, bp); + error = xlog_bread(log, 0, 1, bp, &offset); + if (error) + goto done; + if (xlog_get_cycle(offset) == 0) { *tail_blk = 0; /* leave all other log inited values alone */ - goto exit; + goto done; } } @@ -820,9 +879,10 @@ xlog_find_tail( */ ASSERT(*head_blk < INT_MAX); for (i = (int)(*head_blk) - 1; i >= 0; i--) { - if ((error = xlog_bread(log, i, 1, bp))) - goto bread_err; - offset = xlog_align(log, i, 1, bp); + error = xlog_bread(log, i, 1, bp, &offset); + if (error) + goto done; + if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { found = 1; break; @@ -836,9 +896,10 @@ xlog_find_tail( */ if (!found) { for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { - if ((error = xlog_bread(log, i, 1, bp))) - goto bread_err; - offset = xlog_align(log, i, 1, bp); + error = xlog_bread(log, i, 1, bp, &offset); + if (error) + goto done; + if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { found = 2; @@ -910,10 +971,10 @@ xlog_find_tail( if (*head_blk == after_umount_blk && be32_to_cpu(rhead->h_num_logops) == 1) { umount_data_blk = (i + hblks) % log->l_logBBsize; - if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { - goto bread_err; - } - offset = xlog_align(log, umount_data_blk, 1, bp); + error = xlog_bread(log, umount_data_blk, 1, bp, &offset); + if (error) + goto done; + op_head = (xlog_op_header_t *)offset; if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { /* @@ -958,12 +1019,10 @@ xlog_find_tail( * But... if the -device- itself is readonly, just skip this. * We can't recover this device anyway, so it won't matter. */ - if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { + if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) error = xlog_clear_stale_blocks(log, tail_lsn); - } -bread_err: -exit: +done: xlog_put_bp(bp); if (error) @@ -1005,9 +1064,10 @@ xlog_find_zeroed( bp = xlog_get_bp(log, 1); if (!bp) return ENOMEM; - if ((error = xlog_bread(log, 0, 1, bp))) + error = xlog_bread(log, 0, 1, bp, &offset); + if (error) goto bp_err; - offset = xlog_align(log, 0, 1, bp); + first_cycle = xlog_get_cycle(offset); if (first_cycle == 0) { /* completely zeroed log */ *blk_no = 0; @@ -1016,9 +1076,10 @@ xlog_find_zeroed( } /* check partially zeroed log */ - if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) + error = xlog_bread(log, log_bbnum-1, 1, bp, &offset); + if (error) goto bp_err; - offset = xlog_align(log, log_bbnum-1, 1, bp); + last_cycle = xlog_get_cycle(offset); if (last_cycle != 0) { /* log completely written to */ xlog_put_bp(bp); @@ -1121,16 +1182,22 @@ xlog_write_log_records( xfs_caddr_t offset; xfs_buf_t *bp; int balign, ealign; - int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); + int sectbb = log->l_sectBBsize; int end_block = start_block + blocks; int bufblks; int error = 0; int i, j = 0; + /* + * Greedily allocate a buffer big enough to handle the full + * range of basic blocks to be written. If that fails, try + * a smaller size. We need to be able to write at least a + * log sector, or we're out of luck. + */ bufblks = 1 << ffs(blocks); while (!(bp = xlog_get_bp(log, bufblks))) { bufblks >>= 1; - if (bufblks <= log->l_sectbb_log) + if (bufblks < sectbb) return ENOMEM; } @@ -1138,12 +1205,12 @@ xlog_write_log_records( * the buffer in the starting sector not covered by the first * write below. */ - balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); + balign = round_down(start_block, sectbb); if (balign != start_block) { - if ((error = xlog_bread(log, start_block, 1, bp))) { - xlog_put_bp(bp); - return error; - } + error = xlog_bread_noalign(log, start_block, 1, bp); + if (error) + goto out_put_bp; + j = start_block - balign; } @@ -1157,14 +1224,22 @@ xlog_write_log_records( * the buffer in the final sector not covered by the write. * If this is the same sector as the above read, skip it. */ - ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block); + ealign = round_down(end_block, sectbb); if (j == 0 && (start_block + endcount > ealign)) { offset = XFS_BUF_PTR(bp); balign = BBTOB(ealign - start_block); - XFS_BUF_SET_PTR(bp, offset + balign, BBTOB(sectbb)); - if ((error = xlog_bread(log, ealign, sectbb, bp))) + error = XFS_BUF_SET_PTR(bp, offset + balign, + BBTOB(sectbb)); + if (error) + break; + + error = xlog_bread_noalign(log, ealign, sectbb, bp); + if (error) + break; + + error = XFS_BUF_SET_PTR(bp, offset, bufblks); + if (error) break; - XFS_BUF_SET_PTR(bp, offset, bufblks); } offset = xlog_align(log, start_block, endcount, bp); @@ -1179,6 +1254,8 @@ xlog_write_log_records( start_block += endcount; j = 0; } + + out_put_bp: xlog_put_bp(bp); return error; } @@ -1324,40 +1401,50 @@ xlog_clear_stale_blocks( STATIC xlog_recover_t * xlog_recover_find_tid( - xlog_recover_t *q, + struct hlist_head *head, xlog_tid_t tid) { - xlog_recover_t *p = q; + xlog_recover_t *trans; + struct hlist_node *n; - while (p != NULL) { - if (p->r_log_tid == tid) - break; - p = p->r_next; + hlist_for_each_entry(trans, n, head, r_list) { + if (trans->r_log_tid == tid) + return trans; } - return p; + return NULL; } STATIC void -xlog_recover_put_hashq( - xlog_recover_t **q, - xlog_recover_t *trans) +xlog_recover_new_tid( + struct hlist_head *head, + xlog_tid_t tid, + xfs_lsn_t lsn) { - trans->r_next = *q; - *q = trans; + xlog_recover_t *trans; + + trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); + trans->r_log_tid = tid; + trans->r_lsn = lsn; + INIT_LIST_HEAD(&trans->r_itemq); + + INIT_HLIST_NODE(&trans->r_list); + hlist_add_head(&trans->r_list, head); } STATIC void xlog_recover_add_item( - xlog_recover_item_t **itemq) + struct list_head *head) { xlog_recover_item_t *item; item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); - xlog_recover_insert_item_backq(itemq, item); + INIT_LIST_HEAD(&item->ri_list); + list_add_tail(&item->ri_list, head); } STATIC int xlog_recover_add_to_cont_trans( + struct log *log, xlog_recover_t *trans, xfs_caddr_t dp, int len) @@ -1366,8 +1453,7 @@ xlog_recover_add_to_cont_trans( xfs_caddr_t ptr, old_ptr; int old_len; - item = trans->r_itemq; - if (item == NULL) { + if (list_empty(&trans->r_itemq)) { /* finish copying rest of trans header */ xlog_recover_add_item(&trans->r_itemq); ptr = (xfs_caddr_t) &trans->r_theader + @@ -1375,7 +1461,8 @@ xlog_recover_add_to_cont_trans( memcpy(ptr, dp, len); /* d, s, l */ return 0; } - item = item->ri_prev; + /* take the tail entry */ + item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; @@ -1384,6 +1471,7 @@ xlog_recover_add_to_cont_trans( memcpy(&ptr[old_len], dp, len); /* d, s, l */ item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; + trace_xfs_log_recover_item_add_cont(log, trans, item, 0); return 0; } @@ -1402,6 +1490,7 @@ xlog_recover_add_to_cont_trans( */ STATIC int xlog_recover_add_to_trans( + struct log *log, xlog_recover_t *trans, xfs_caddr_t dp, int len) @@ -1412,9 +1501,14 @@ xlog_recover_add_to_trans( if (!len) return 0; - item = trans->r_itemq; - if (item == NULL) { - ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); + if (list_empty(&trans->r_itemq)) { + /* we need to catch log corruptions here */ + if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { + xlog_warn("XFS: xlog_recover_add_to_trans: " + "bad header magic number"); + ASSERT(0); + return XFS_ERROR(EIO); + } if (len == sizeof(xfs_trans_header_t)) xlog_recover_add_item(&trans->r_itemq); memcpy(&trans->r_theader, dp, len); /* d, s, l */ @@ -1425,117 +1519,67 @@ xlog_recover_add_to_trans( memcpy(ptr, dp, len); in_f = (xfs_inode_log_format_t *)ptr; - if (item->ri_prev->ri_total != 0 && - item->ri_prev->ri_total == item->ri_prev->ri_cnt) { + /* take the tail entry */ + item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); + if (item->ri_total != 0 && + item->ri_total == item->ri_cnt) { + /* tail item is in use, get a new one */ xlog_recover_add_item(&trans->r_itemq); + item = list_entry(trans->r_itemq.prev, + xlog_recover_item_t, ri_list); } - item = trans->r_itemq; - item = item->ri_prev; if (item->ri_total == 0) { /* first region to be added */ - item->ri_total = in_f->ilf_size; - ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); - item->ri_buf = kmem_zalloc((item->ri_total * - sizeof(xfs_log_iovec_t)), KM_SLEEP); + if (in_f->ilf_size == 0 || + in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { + xlog_warn( + "XFS: bad number of regions (%d) in inode log format", + in_f->ilf_size); + ASSERT(0); + return XFS_ERROR(EIO); + } + + item->ri_total = in_f->ilf_size; + item->ri_buf = + kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), + KM_SLEEP); } ASSERT(item->ri_total > item->ri_cnt); /* Description region is ri_buf[0] */ item->ri_buf[item->ri_cnt].i_addr = ptr; item->ri_buf[item->ri_cnt].i_len = len; item->ri_cnt++; + trace_xfs_log_recover_item_add(log, trans, item, 0); return 0; } -STATIC void -xlog_recover_new_tid( - xlog_recover_t **q, - xlog_tid_t tid, - xfs_lsn_t lsn) -{ - xlog_recover_t *trans; - - trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); - trans->r_log_tid = tid; - trans->r_lsn = lsn; - xlog_recover_put_hashq(q, trans); -} - -STATIC int -xlog_recover_unlink_tid( - xlog_recover_t **q, - xlog_recover_t *trans) -{ - xlog_recover_t *tp; - int found = 0; - - ASSERT(trans != NULL); - if (trans == *q) { - *q = (*q)->r_next; - } else { - tp = *q; - while (tp) { - if (tp->r_next == trans) { - found = 1; - break; - } - tp = tp->r_next; - } - if (!found) { - xlog_warn( - "XFS: xlog_recover_unlink_tid: trans not found"); - ASSERT(0); - return XFS_ERROR(EIO); - } - tp->r_next = tp->r_next->r_next; - } - return 0; -} - -STATIC void -xlog_recover_insert_item_backq( - xlog_recover_item_t **q, - xlog_recover_item_t *item) -{ - if (*q == NULL) { - item->ri_prev = item->ri_next = item; - *q = item; - } else { - item->ri_next = *q; - item->ri_prev = (*q)->ri_prev; - (*q)->ri_prev = item; - item->ri_prev->ri_next = item; - } -} - -STATIC void -xlog_recover_insert_item_frontq( - xlog_recover_item_t **q, - xlog_recover_item_t *item) -{ - xlog_recover_insert_item_backq(q, item); - *q = item; -} - +/* + * Sort the log items in the transaction. Cancelled buffers need + * to be put first so they are processed before any items that might + * modify the buffers. If they are cancelled, then the modifications + * don't need to be replayed. + */ STATIC int xlog_recover_reorder_trans( - xlog_recover_t *trans) + struct log *log, + xlog_recover_t *trans, + int pass) { - xlog_recover_item_t *first_item, *itemq, *itemq_next; - xfs_buf_log_format_t *buf_f; - ushort flags = 0; + xlog_recover_item_t *item, *n; + LIST_HEAD(sort_list); + + list_splice_init(&trans->r_itemq, &sort_list); + list_for_each_entry_safe(item, n, &sort_list, ri_list) { + xfs_buf_log_format_t *buf_f; - first_item = itemq = trans->r_itemq; - trans->r_itemq = NULL; - do { - itemq_next = itemq->ri_next; - buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr; + buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; - switch (ITEM_TYPE(itemq)) { + switch (ITEM_TYPE(item)) { case XFS_LI_BUF: - flags = buf_f->blf_flags; - if (!(flags & XFS_BLI_CANCEL)) { - xlog_recover_insert_item_frontq(&trans->r_itemq, - itemq); + if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) { + trace_xfs_log_recover_item_reorder_head(log, + trans, item, pass); + list_move(&item->ri_list, &trans->r_itemq); break; } case XFS_LI_INODE: @@ -1543,7 +1587,9 @@ xlog_recover_reorder_trans( case XFS_LI_QUOTAOFF: case XFS_LI_EFD: case XFS_LI_EFI: - xlog_recover_insert_item_backq(&trans->r_itemq, itemq); + trace_xfs_log_recover_item_reorder_tail(log, + trans, item, pass); + list_move_tail(&item->ri_list, &trans->r_itemq); break; default: xlog_warn( @@ -1551,8 +1597,8 @@ xlog_recover_reorder_trans( ASSERT(0); return XFS_ERROR(EIO); } - itemq = itemq_next; - } while (first_item != itemq); + } + ASSERT(list_empty(&sort_list)); return 0; } @@ -1592,8 +1638,10 @@ xlog_recover_do_buffer_pass1( /* * If this isn't a cancel buffer item, then just return. */ - if (!(flags & XFS_BLI_CANCEL)) + if (!(flags & XFS_BLI_CANCEL)) { + trace_xfs_log_recover_buf_not_cancel(log, buf_f); return; + } /* * Insert an xfs_buf_cancel record into the hash table of @@ -1627,6 +1675,7 @@ xlog_recover_do_buffer_pass1( while (nextp != NULL) { if (nextp->bc_blkno == blkno && nextp->bc_len == len) { nextp->bc_refcount++; + trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); return; } prevp = nextp; @@ -1640,6 +1689,7 @@ xlog_recover_do_buffer_pass1( bcp->bc_refcount = 1; bcp->bc_next = NULL; prevp->bc_next = bcp; + trace_xfs_log_recover_buf_cancel_add(log, buf_f); } /* @@ -1710,8 +1760,7 @@ xlog_check_buffer_cancelled( } else { prevp->bc_next = bcp->bc_next; } - kmem_free(bcp, - sizeof(xfs_buf_cancel_t)); + kmem_free(bcp); } } return 1; @@ -1780,6 +1829,8 @@ xlog_recover_do_inode_buffer( unsigned int *data_map = NULL; unsigned int map_size = 0; + trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); + switch (buf_f->blf_type) { case XFS_LI_BUF: data_map = buf_f->blf_data_map; @@ -1875,6 +1926,7 @@ xlog_recover_do_inode_buffer( /*ARGSUSED*/ STATIC void xlog_recover_do_reg_buffer( + struct xfs_mount *mp, xlog_recover_item_t *item, xfs_buf_t *bp, xfs_buf_log_format_t *buf_f) @@ -1886,6 +1938,8 @@ xlog_recover_do_reg_buffer( unsigned int map_size = 0; int error; + trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); + switch (buf_f->blf_type) { case XFS_LI_BUF: data_map = buf_f->blf_data_map; @@ -1913,16 +1967,30 @@ xlog_recover_do_reg_buffer( error = 0; if (buf_f->blf_flags & (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { + if (item->ri_buf[i].i_addr == NULL) { + cmn_err(CE_ALERT, + "XFS: NULL dquot in %s.", __func__); + goto next; + } + if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { + cmn_err(CE_ALERT, + "XFS: dquot too small (%d) in %s.", + item->ri_buf[i].i_len, __func__); + goto next; + } error = xfs_qm_dqcheck((xfs_disk_dquot_t *) item->ri_buf[i].i_addr, -1, 0, XFS_QMOPT_DOWARN, "dquot_buf_recover"); + if (error) + goto next; } - if (!error) - memcpy(xfs_buf_offset(bp, - (uint)bit << XFS_BLI_SHIFT), /* dest */ - item->ri_buf[i].i_addr, /* source */ - nbits<ri_buf[i].i_addr, /* source */ + nbits<l_quotaoffs_flag & type) return; - xlog_recover_do_reg_buffer(item, bp, buf_f); + xlog_recover_do_reg_buffer(mp, item, bp, buf_f); } /* @@ -2130,6 +2200,7 @@ xlog_recover_do_buffer_trans( xfs_daddr_t blkno; int len; ushort flags; + uint buf_flags; buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; @@ -2150,9 +2221,11 @@ xlog_recover_do_buffer_trans( */ cancel = xlog_recover_do_buffer_pass2(log, buf_f); if (cancel) { + trace_xfs_log_recover_buf_cancel(log, buf_f); return 0; } } + trace_xfs_log_recover_buf_recover(log, buf_f); switch (buf_f->blf_type) { case XFS_LI_BUF: blkno = buf_f->blf_blkno; @@ -2170,12 +2243,11 @@ xlog_recover_do_buffer_trans( } mp = log->l_mp; - if (flags & XFS_BLI_INODE_BUF) { - bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len, - XFS_BUF_LOCK); - } else { - bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0); - } + buf_flags = XBF_LOCK; + if (!(flags & XFS_BLI_INODE_BUF)) + buf_flags |= XBF_MAPPED; + + bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); if (XFS_BUF_ISERROR(bp)) { xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, bp, blkno); @@ -2191,7 +2263,7 @@ xlog_recover_do_buffer_trans( (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); } else { - xlog_recover_do_reg_buffer(item, bp, buf_f); + xlog_recover_do_reg_buffer(mp, item, bp, buf_f); } if (error) return XFS_ERROR(error); @@ -2218,9 +2290,8 @@ xlog_recover_do_buffer_trans( XFS_BUF_STALE(bp); error = xfs_bwrite(mp, bp); } else { - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || - XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); - XFS_BUF_SET_FSPRIVATE(bp, mp); + ASSERT(bp->b_mount == NULL || bp->b_mount == mp); + bp->b_mount = mp; XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); } @@ -2237,7 +2308,6 @@ xlog_recover_do_inode_trans( xfs_inode_log_format_t *in_f; xfs_mount_t *mp; xfs_buf_t *bp; - xfs_imap_t imap; xfs_dinode_t *dip; xfs_ino_t ino; int len; @@ -2265,52 +2335,37 @@ xlog_recover_do_inode_trans( } ino = in_f->ilf_ino; mp = log->l_mp; - if (ITEM_TYPE(item) == XFS_LI_INODE) { - imap.im_blkno = (xfs_daddr_t)in_f->ilf_blkno; - imap.im_len = in_f->ilf_len; - imap.im_boffset = in_f->ilf_boffset; - } else { - /* - * It's an old inode format record. We don't know where - * its cluster is located on disk, and we can't allow - * xfs_imap() to figure it out because the inode btrees - * are not ready to be used. Therefore do not pass the - * XFS_IMAP_LOOKUP flag to xfs_imap(). This will give - * us only the single block in which the inode lives - * rather than its cluster, so we must make sure to - * invalidate the buffer when we write it out below. - */ - imap.im_blkno = 0; - xfs_imap(log->l_mp, NULL, ino, &imap, 0); - } /* * Inode buffers can be freed, look out for it, * and do not replay the inode. */ - if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0)) { + if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, + in_f->ilf_len, 0)) { error = 0; + trace_xfs_log_recover_inode_cancel(log, in_f); goto error; } + trace_xfs_log_recover_inode_recover(log, in_f); - bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len, - XFS_BUF_LOCK); + bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, + XBF_LOCK); if (XFS_BUF_ISERROR(bp)) { xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, - bp, imap.im_blkno); + bp, in_f->ilf_blkno); error = XFS_BUF_GETERROR(bp); xfs_buf_relse(bp); goto error; } error = 0; ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); - dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); + dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); /* * Make sure the place we're flushing out to really looks * like an inode! */ - if (unlikely(be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC)) { + if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { xfs_buf_relse(bp); xfs_fs_cmn_err(CE_ALERT, mp, "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", @@ -2333,16 +2388,17 @@ xlog_recover_do_inode_trans( } /* Skip replay when the on disk inode is newer than the log one */ - if (dicp->di_flushiter < be16_to_cpu(dip->di_core.di_flushiter)) { + if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { /* * Deal with the wrap case, DI_MAX_FLUSH is less * than smaller numbers */ - if (be16_to_cpu(dip->di_core.di_flushiter) == DI_MAX_FLUSH && + if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { /* do nothing */ } else { xfs_buf_relse(bp); + trace_xfs_log_recover_inode_skip(log, in_f); error = 0; goto error; } @@ -2398,7 +2454,7 @@ xlog_recover_do_inode_trans( error = EFSCORRUPTED; goto error; } - if (unlikely(item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t))) { + if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", XFS_ERRLEVEL_LOW, mp, dicp); xfs_buf_relse(bp); @@ -2410,23 +2466,24 @@ xlog_recover_do_inode_trans( } /* The core is in in-core format */ - xfs_dinode_to_disk(&dip->di_core, - (xfs_icdinode_t *)item->ri_buf[1].i_addr); + xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr); /* the rest is in on-disk format */ - if (item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t)) { - memcpy((xfs_caddr_t) dip + sizeof(xfs_dinode_core_t), - item->ri_buf[1].i_addr + sizeof(xfs_dinode_core_t), - item->ri_buf[1].i_len - sizeof(xfs_dinode_core_t)); + if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { + memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), + item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), + item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); } fields = in_f->ilf_fields; switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { case XFS_ILOG_DEV: - dip->di_u.di_dev = cpu_to_be32(in_f->ilf_u.ilfu_rdev); + xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); break; case XFS_ILOG_UUID: - dip->di_u.di_muuid = in_f->ilf_u.ilfu_uuid; + memcpy(XFS_DFORK_DPTR(dip), + &in_f->ilf_u.ilfu_uuid, + sizeof(uuid_t)); break; } @@ -2442,12 +2499,12 @@ xlog_recover_do_inode_trans( switch (fields & XFS_ILOG_DFORK) { case XFS_ILOG_DDATA: case XFS_ILOG_DEXT: - memcpy(&dip->di_u, src, len); + memcpy(XFS_DFORK_DPTR(dip), src, len); break; case XFS_ILOG_DBROOT: - xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, - &(dip->di_u.di_bmbt), + xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, + (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), XFS_DFORK_DSIZE(dip, mp)); break; @@ -2484,8 +2541,8 @@ xlog_recover_do_inode_trans( case XFS_ILOG_ABROOT: dest = XFS_DFORK_APTR(dip); - xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, - (xfs_bmdr_block_t*)dest, + xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, + len, (xfs_bmdr_block_t*)dest, XFS_DFORK_ASIZE(dip, mp)); break; @@ -2499,20 +2556,13 @@ xlog_recover_do_inode_trans( } write_inode_buffer: - if (ITEM_TYPE(item) == XFS_LI_INODE) { - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || - XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); - XFS_BUF_SET_FSPRIVATE(bp, mp); - XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); - xfs_bdwrite(mp, bp); - } else { - XFS_BUF_STALE(bp); - error = xfs_bwrite(mp, bp); - } - + ASSERT(bp->b_mount == NULL || bp->b_mount == mp); + bp->b_mount = mp; + XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); + xfs_bdwrite(mp, bp); error: if (need_free) - kmem_free(in_f, sizeof(*in_f)); + kmem_free(in_f); return XFS_ERROR(error); } @@ -2578,7 +2628,19 @@ xlog_recover_do_dquot_trans( return (0); recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr; - ASSERT(recddq); + + if (item->ri_buf[1].i_addr == NULL) { + cmn_err(CE_ALERT, + "XFS: NULL dquot in %s.", __func__); + return XFS_ERROR(EIO); + } + if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { + cmn_err(CE_ALERT, + "XFS: dquot too small (%d) in %s.", + item->ri_buf[1].i_len, __func__); + return XFS_ERROR(EIO); + } + /* * This type of quotas was turned off, so ignore this record. */ @@ -2633,9 +2695,8 @@ xlog_recover_do_dquot_trans( memcpy(ddq, recddq, item->ri_buf[1].i_len); ASSERT(dq_f->qlf_size == 2); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || - XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); - XFS_BUF_SET_FSPRIVATE(bp, mp); + ASSERT(bp->b_mount == NULL || bp->b_mount == mp); + bp->b_mount = mp; XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); @@ -2677,11 +2738,11 @@ xlog_recover_do_efi_trans( efip->efi_next_extent = efi_formatp->efi_nextents; efip->efi_flags |= XFS_EFI_COMMITTED; - spin_lock(&mp->m_ail_lock); + spin_lock(&log->l_ailp->xa_lock); /* - * xfs_trans_update_ail() drops the AIL lock. + * xfs_trans_ail_update() drops the AIL lock. */ - xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn); + xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); return 0; } @@ -2700,12 +2761,12 @@ xlog_recover_do_efd_trans( xlog_recover_item_t *item, int pass) { - xfs_mount_t *mp; xfs_efd_log_format_t *efd_formatp; xfs_efi_log_item_t *efip = NULL; xfs_log_item_t *lip; - int gen; __uint64_t efi_id; + struct xfs_ail_cursor cur; + struct xfs_ail *ailp = log->l_ailp; if (pass == XLOG_RECOVER_PASS1) { return; @@ -2722,25 +2783,26 @@ xlog_recover_do_efd_trans( * Search for the efi with the id in the efd format structure * in the AIL. */ - mp = log->l_mp; - spin_lock(&mp->m_ail_lock); - lip = xfs_trans_first_ail(mp, &gen); + spin_lock(&ailp->xa_lock); + lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); while (lip != NULL) { if (lip->li_type == XFS_LI_EFI) { efip = (xfs_efi_log_item_t *)lip; if (efip->efi_format.efi_id == efi_id) { /* - * xfs_trans_delete_ail() drops the + * xfs_trans_ail_delete() drops the * AIL lock. */ - xfs_trans_delete_ail(mp, lip); + xfs_trans_ail_delete(ailp, lip); xfs_efi_item_free(efip); - return; + spin_lock(&ailp->xa_lock); + break; } } - lip = xfs_trans_next_ail(mp, lip, &gen, NULL); + lip = xfs_trans_ail_cursor_next(ailp, &cur); } - spin_unlock(&mp->m_ail_lock); + xfs_trans_ail_cursor_done(ailp, &cur); + spin_unlock(&ailp->xa_lock); } /* @@ -2756,53 +2818,49 @@ xlog_recover_do_trans( int pass) { int error = 0; - xlog_recover_item_t *item, *first_item; + xlog_recover_item_t *item; - if ((error = xlog_recover_reorder_trans(trans))) + error = xlog_recover_reorder_trans(log, trans, pass); + if (error) return error; - first_item = item = trans->r_itemq; - do { - /* - * we don't need to worry about the block number being - * truncated in > 1 TB buffers because in user-land, - * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so - * the blknos will get through the user-mode buffer - * cache properly. The only bad case is o32 kernels - * where xfs_daddr_t is 32-bits but mount will warn us - * off a > 1 TB filesystem before we get here. - */ - if ((ITEM_TYPE(item) == XFS_LI_BUF)) { - if ((error = xlog_recover_do_buffer_trans(log, item, - pass))) - break; - } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) { - if ((error = xlog_recover_do_inode_trans(log, item, - pass))) - break; - } else if (ITEM_TYPE(item) == XFS_LI_EFI) { - if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn, - pass))) - break; - } else if (ITEM_TYPE(item) == XFS_LI_EFD) { + + list_for_each_entry(item, &trans->r_itemq, ri_list) { + trace_xfs_log_recover_item_recover(log, trans, item, pass); + switch (ITEM_TYPE(item)) { + case XFS_LI_BUF: + error = xlog_recover_do_buffer_trans(log, item, pass); + break; + case XFS_LI_INODE: + error = xlog_recover_do_inode_trans(log, item, pass); + break; + case XFS_LI_EFI: + error = xlog_recover_do_efi_trans(log, item, + trans->r_lsn, pass); + break; + case XFS_LI_EFD: xlog_recover_do_efd_trans(log, item, pass); - } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) { - if ((error = xlog_recover_do_dquot_trans(log, item, - pass))) - break; - } else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) { - if ((error = xlog_recover_do_quotaoff_trans(log, item, - pass))) - break; - } else { - xlog_warn("XFS: xlog_recover_do_trans"); + error = 0; + break; + case XFS_LI_DQUOT: + error = xlog_recover_do_dquot_trans(log, item, pass); + break; + case XFS_LI_QUOTAOFF: + error = xlog_recover_do_quotaoff_trans(log, item, + pass); + break; + default: + xlog_warn( + "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item)); ASSERT(0); error = XFS_ERROR(EIO); break; } - item = item->ri_next; - } while (first_item != item); - return error; + if (error) + return error; + } + + return 0; } /* @@ -2814,38 +2872,31 @@ STATIC void xlog_recover_free_trans( xlog_recover_t *trans) { - xlog_recover_item_t *first_item, *item, *free_item; + xlog_recover_item_t *item, *n; int i; - item = first_item = trans->r_itemq; - do { - free_item = item; - item = item->ri_next; - /* Free the regions in the item. */ - for (i = 0; i < free_item->ri_cnt; i++) { - kmem_free(free_item->ri_buf[i].i_addr, - free_item->ri_buf[i].i_len); - } + list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { + /* Free the regions in the item. */ + list_del(&item->ri_list); + for (i = 0; i < item->ri_cnt; i++) + kmem_free(item->ri_buf[i].i_addr); /* Free the item itself */ - kmem_free(free_item->ri_buf, - (free_item->ri_total * sizeof(xfs_log_iovec_t))); - kmem_free(free_item, sizeof(xlog_recover_item_t)); - } while (first_item != item); + kmem_free(item->ri_buf); + kmem_free(item); + } /* Free the transaction recover structure */ - kmem_free(trans, sizeof(xlog_recover_t)); + kmem_free(trans); } STATIC int xlog_recover_commit_trans( xlog_t *log, - xlog_recover_t **q, xlog_recover_t *trans, int pass) { int error; - if ((error = xlog_recover_unlink_tid(q, trans))) - return error; + hlist_del(&trans->r_list); if ((error = xlog_recover_do_trans(log, trans, pass))) return error; xlog_recover_free_trans(trans); /* no error */ @@ -2873,7 +2924,7 @@ xlog_recover_unmount_trans( STATIC int xlog_recover_process_data( xlog_t *log, - xlog_recover_t *rhash[], + struct hlist_head rhash[], xlog_rec_header_t *rhead, xfs_caddr_t dp, int pass) @@ -2907,7 +2958,7 @@ xlog_recover_process_data( } tid = be32_to_cpu(ohead->oh_tid); hash = XLOG_RHASH(tid); - trans = xlog_recover_find_tid(rhash[hash], tid); + trans = xlog_recover_find_tid(&rhash[hash], tid); if (trans == NULL) { /* not found; add new tid */ if (ohead->oh_flags & XLOG_START_TRANS) xlog_recover_new_tid(&rhash[hash], tid, @@ -2925,14 +2976,15 @@ xlog_recover_process_data( switch (flags) { case XLOG_COMMIT_TRANS: error = xlog_recover_commit_trans(log, - &rhash[hash], trans, pass); + trans, pass); break; case XLOG_UNMOUNT_TRANS: error = xlog_recover_unmount_trans(trans); break; case XLOG_WAS_CONT_TRANS: - error = xlog_recover_add_to_cont_trans(trans, - dp, be32_to_cpu(ohead->oh_len)); + error = xlog_recover_add_to_cont_trans(log, + trans, dp, + be32_to_cpu(ohead->oh_len)); break; case XLOG_START_TRANS: xlog_warn( @@ -2942,7 +2994,7 @@ xlog_recover_process_data( break; case 0: case XLOG_CONTINUE_TRANS: - error = xlog_recover_add_to_trans(trans, + error = xlog_recover_add_to_trans(log, trans, dp, be32_to_cpu(ohead->oh_len)); break; default: @@ -2965,7 +3017,7 @@ xlog_recover_process_data( * Process an extent free intent item that was recovered from * the log. We need to free the extents that it describes. */ -STATIC void +STATIC int xlog_recover_process_efi( xfs_mount_t *mp, xfs_efi_log_item_t *efip) @@ -2973,6 +3025,7 @@ xlog_recover_process_efi( xfs_efd_log_item_t *efdp; xfs_trans_t *tp; int i; + int error = 0; xfs_extent_t *extp; xfs_fsblock_t startblock_fsb; @@ -2996,51 +3049,33 @@ xlog_recover_process_efi( * free the memory associated with it. */ xfs_efi_release(efip, efip->efi_format.efi_nextents); - return; + return XFS_ERROR(EIO); } } tp = xfs_trans_alloc(mp, 0); - xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); + error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); + if (error) + goto abort_error; efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); for (i = 0; i < efip->efi_format.efi_nextents; i++) { extp = &(efip->efi_format.efi_extents[i]); - xfs_free_extent(tp, extp->ext_start, extp->ext_len); + error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); + if (error) + goto abort_error; xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, extp->ext_len); } efip->efi_flags |= XFS_EFI_RECOVERED; - xfs_trans_commit(tp, 0); -} - -/* - * Verify that once we've encountered something other than an EFI - * in the AIL that there are no more EFIs in the AIL. - */ -#if defined(DEBUG) -STATIC void -xlog_recover_check_ail( - xfs_mount_t *mp, - xfs_log_item_t *lip, - int gen) -{ - int orig_gen = gen; + error = xfs_trans_commit(tp, 0); + return error; - do { - ASSERT(lip->li_type != XFS_LI_EFI); - lip = xfs_trans_next_ail(mp, lip, &gen, NULL); - /* - * The check will be bogus if we restart from the - * beginning of the AIL, so ASSERT that we don't. - * We never should since we're holding the AIL lock - * the entire time. - */ - ASSERT(gen == orig_gen); - } while (lip != NULL); +abort_error: + xfs_trans_cancel(tp, XFS_TRANS_ABORT); + return error; } -#endif /* DEBUG */ /* * When this is called, all of the EFIs which did not have @@ -3060,25 +3095,29 @@ xlog_recover_check_ail( * everything already in the AIL, we stop processing as soon as * we see something other than an EFI in the AIL. */ -STATIC void +STATIC int xlog_recover_process_efis( xlog_t *log) { xfs_log_item_t *lip; xfs_efi_log_item_t *efip; - int gen; - xfs_mount_t *mp; - - mp = log->l_mp; - spin_lock(&mp->m_ail_lock); + int error = 0; + struct xfs_ail_cursor cur; + struct xfs_ail *ailp; - lip = xfs_trans_first_ail(mp, &gen); + ailp = log->l_ailp; + spin_lock(&ailp->xa_lock); + lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); while (lip != NULL) { /* * We're done when we see something other than an EFI. + * There should be no EFIs left in the AIL now. */ if (lip->li_type != XFS_LI_EFI) { - xlog_recover_check_ail(mp, lip, gen); +#ifdef DEBUG + for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) + ASSERT(lip->li_type != XFS_LI_EFI); +#endif break; } @@ -3087,16 +3126,21 @@ xlog_recover_process_efis( */ efip = (xfs_efi_log_item_t *)lip; if (efip->efi_flags & XFS_EFI_RECOVERED) { - lip = xfs_trans_next_ail(mp, lip, &gen, NULL); + lip = xfs_trans_ail_cursor_next(ailp, &cur); continue; } - spin_unlock(&mp->m_ail_lock); - xlog_recover_process_efi(mp, efip); - spin_lock(&mp->m_ail_lock); - lip = xfs_trans_next_ail(mp, lip, &gen, NULL); + spin_unlock(&ailp->xa_lock); + error = xlog_recover_process_efi(log->l_mp, efip); + spin_lock(&ailp->xa_lock); + if (error) + goto out; + lip = xfs_trans_ail_cursor_next(ailp, &cur); } - spin_unlock(&mp->m_ail_lock); +out: + xfs_trans_ail_cursor_done(ailp, &cur); + spin_unlock(&ailp->xa_lock); + return error; } /* @@ -3116,29 +3160,89 @@ xlog_recover_clear_agi_bucket( int error; tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); - xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0); + error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), + 0, 0, 0); + if (error) + goto out_abort; - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0, &agibp); - if (error) { - xfs_trans_cancel(tp, XFS_TRANS_ABORT); - return; - } + error = xfs_read_agi(mp, tp, agno, &agibp); + if (error) + goto out_abort; agi = XFS_BUF_TO_AGI(agibp); - if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) { - xfs_trans_cancel(tp, XFS_TRANS_ABORT); - return; - } - agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); - (void) xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp, 0); + if (error) + goto out_error; + return; + +out_abort: + xfs_trans_cancel(tp, XFS_TRANS_ABORT); +out_error: + xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " + "failed to clear agi %d. Continuing.", agno); + return; +} + +STATIC xfs_agino_t +xlog_recover_process_one_iunlink( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_agino_t agino, + int bucket) +{ + struct xfs_buf *ibp; + struct xfs_dinode *dip; + struct xfs_inode *ip; + xfs_ino_t ino; + int error; + + ino = XFS_AGINO_TO_INO(mp, agno, agino); + error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); + if (error) + goto fail; + + /* + * Get the on disk inode to find the next inode in the bucket. + */ + error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK); + if (error) + goto fail_iput; + + ASSERT(ip->i_d.di_nlink == 0); + ASSERT(ip->i_d.di_mode != 0); + + /* setup for the next pass */ + agino = be32_to_cpu(dip->di_next_unlinked); + xfs_buf_relse(ibp); + + /* + * Prevent any DMAPI event from being sent when the reference on + * the inode is dropped. + */ + ip->i_d.di_dmevmask = 0; + + IRELE(ip); + return agino; + + fail_iput: + IRELE(ip); + fail: + /* + * We can't read in the inode this bucket points to, or this inode + * is messed up. Just ditch this bucket of inodes. We will lose + * some inodes and space, but at least we won't hang. + * + * Call xlog_recover_clear_agi_bucket() to perform a transaction to + * clear the inode pointer in the bucket. + */ + xlog_recover_clear_agi_bucket(mp, agno, bucket); + return NULLAGINO; } /* @@ -3153,7 +3257,7 @@ xlog_recover_clear_agi_bucket( * freeing of the inode and its removal from the list must be * atomic. */ -void +STATIC void xlog_recover_process_iunlinks( xlog_t *log) { @@ -3161,11 +3265,7 @@ xlog_recover_process_iunlinks( xfs_agnumber_t agno; xfs_agi_t *agi; xfs_buf_t *agibp; - xfs_buf_t *ibp; - xfs_dinode_t *dip; - xfs_inode_t *ip; xfs_agino_t agino; - xfs_ino_t ino; int bucket; int error; uint mp_dmevmask; @@ -3182,22 +3282,21 @@ xlog_recover_process_iunlinks( /* * Find the agi for this ag. */ - agibp = xfs_buf_read(mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0); - if (XFS_BUF_ISERROR(agibp)) { - xfs_ioerror_alert("xlog_recover_process_iunlinks(#1)", - log->l_mp, agibp, - XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp))); + error = xfs_read_agi(mp, NULL, agno, &agibp); + if (error) { + /* + * AGI is b0rked. Don't process it. + * + * We should probably mark the filesystem as corrupt + * after we've recovered all the ag's we can.... + */ + continue; } agi = XFS_BUF_TO_AGI(agibp); - ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agi->agi_magicnum)); for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { - agino = be32_to_cpu(agi->agi_unlinked[bucket]); while (agino != NULLAGINO) { - /* * Release the agi buffer so that it can * be acquired in the normal course of the @@ -3205,87 +3304,17 @@ xlog_recover_process_iunlinks( */ xfs_buf_relse(agibp); - ino = XFS_AGINO_TO_INO(mp, agno, agino); - error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); - ASSERT(error || (ip != NULL)); - - if (!error) { - /* - * Get the on disk inode to find the - * next inode in the bucket. - */ - error = xfs_itobp(mp, NULL, ip, &dip, - &ibp, 0, 0, - XFS_BUF_LOCK); - ASSERT(error || (dip != NULL)); - } - - if (!error) { - ASSERT(ip->i_d.di_nlink == 0); - - /* setup for the next pass */ - agino = be32_to_cpu( - dip->di_next_unlinked); - xfs_buf_relse(ibp); - /* - * Prevent any DMAPI event from - * being sent when the - * reference on the inode is - * dropped. - */ - ip->i_d.di_dmevmask = 0; - - /* - * If this is a new inode, handle - * it specially. Otherwise, - * just drop our reference to the - * inode. If there are no - * other references, this will - * send the inode to - * xfs_inactive() which will - * truncate the file and free - * the inode. - */ - if (ip->i_d.di_mode == 0) - xfs_iput_new(ip, 0); - else - IRELE(ip); - } else { - /* - * We can't read in the inode - * this bucket points to, or - * this inode is messed up. Just - * ditch this bucket of inodes. We - * will lose some inodes and space, - * but at least we won't hang. Call - * xlog_recover_clear_agi_bucket() - * to perform a transaction to clear - * the inode pointer in the bucket. - */ - xlog_recover_clear_agi_bucket(mp, agno, - bucket); - - agino = NULLAGINO; - } + agino = xlog_recover_process_one_iunlink(mp, + agno, agino, bucket); /* * Reacquire the agibuffer and continue around - * the loop. + * the loop. This should never fail as we know + * the buffer was good earlier on. */ - agibp = xfs_buf_read(mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, - XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0); - if (XFS_BUF_ISERROR(agibp)) { - xfs_ioerror_alert( - "xlog_recover_process_iunlinks(#2)", - log->l_mp, agibp, - XFS_AG_DADDR(mp, agno, - XFS_AGI_DADDR(mp))); - } + error = xfs_read_agi(mp, NULL, agno, &agibp); + ASSERT(error == 0); agi = XFS_BUF_TO_AGI(agibp); - ASSERT(XFS_AGI_MAGIC == be32_to_cpu( - agi->agi_magicnum)); } } @@ -3336,7 +3365,6 @@ xlog_pack_data( int size = iclog->ic_offset + roundoff; __be32 cycle_lsn; xfs_caddr_t dp; - xlog_in_core_2_t *xhdr; xlog_pack_data_checksum(log, iclog, size); @@ -3351,7 +3379,8 @@ xlog_pack_data( } if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { - xhdr = (xlog_in_core_2_t *)&iclog->ic_header; + xlog_in_core_2_t *xhdr = iclog->ic_data; + for ( ; i < BTOBB(size); i++) { j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); @@ -3366,42 +3395,6 @@ xlog_pack_data( } } -#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) -STATIC void -xlog_unpack_data_checksum( - xlog_rec_header_t *rhead, - xfs_caddr_t dp, - xlog_t *log) -{ - __be32 *up = (__be32 *)dp; - uint chksum = 0; - int i; - - /* divide length by 4 to get # words */ - for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) { - chksum ^= be32_to_cpu(*up); - up++; - } - if (chksum != be32_to_cpu(rhead->h_chksum)) { - if (rhead->h_chksum || - ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { - cmn_err(CE_DEBUG, - "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n", - be32_to_cpu(rhead->h_chksum), chksum); - cmn_err(CE_DEBUG, -"XFS: Disregard message if filesystem was created with non-DEBUG kernel"); - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { - cmn_err(CE_DEBUG, - "XFS: LogR this is a LogV2 filesystem\n"); - } - log->l_flags |= XLOG_CHKSUM_MISMATCH; - } - } -} -#else -#define xlog_unpack_data_checksum(rhead, dp, log) -#endif - STATIC void xlog_unpack_data( xlog_rec_header_t *rhead, @@ -3409,7 +3402,6 @@ xlog_unpack_data( xlog_t *log) { int i, j, k; - xlog_in_core_2_t *xhdr; for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { @@ -3418,7 +3410,7 @@ xlog_unpack_data( } if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { - xhdr = (xlog_in_core_2_t *)rhead; + xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); @@ -3426,8 +3418,6 @@ xlog_unpack_data( dp += BBSIZE; } } - - xlog_unpack_data_checksum(rhead, dp, log); } STATIC int @@ -3483,12 +3473,12 @@ xlog_do_recovery_pass( { xlog_rec_header_t *rhead; xfs_daddr_t blk_no; - xfs_caddr_t bufaddr, offset; + xfs_caddr_t offset; xfs_buf_t *hbp, *dbp; int error = 0, h_size; int bblks, split_bblks; int hblks, split_hblks, wrapped_hblks; - xlog_recover_t *rhash[XLOG_RHASH_SIZE]; + struct hlist_head rhash[XLOG_RHASH_SIZE]; ASSERT(head_blk != tail_blk); @@ -3505,9 +3495,11 @@ xlog_do_recovery_pass( hbp = xlog_get_bp(log, 1); if (!hbp) return ENOMEM; - if ((error = xlog_bread(log, tail_blk, 1, hbp))) + + error = xlog_bread(log, tail_blk, 1, hbp, &offset); + if (error) goto bread_err1; - offset = xlog_align(log, tail_blk, 1, hbp); + rhead = (xlog_rec_header_t *)offset; error = xlog_valid_rec_header(log, rhead, tail_blk); if (error) @@ -3524,7 +3516,7 @@ xlog_do_recovery_pass( hblks = 1; } } else { - ASSERT(log->l_sectbb_log == 0); + ASSERT(log->l_sectBBsize == 1); hblks = 1; hbp = xlog_get_bp(log, 1); h_size = XLOG_BIG_RECORD_BSIZE; @@ -3541,9 +3533,10 @@ xlog_do_recovery_pass( memset(rhash, 0, sizeof(rhash)); if (tail_blk <= head_blk) { for (blk_no = tail_blk; blk_no < head_blk; ) { - if ((error = xlog_bread(log, blk_no, hblks, hbp))) + error = xlog_bread(log, blk_no, hblks, hbp, &offset); + if (error) goto bread_err2; - offset = xlog_align(log, blk_no, hblks, hbp); + rhead = (xlog_rec_header_t *)offset; error = xlog_valid_rec_header(log, rhead, blk_no); if (error) @@ -3551,10 +3544,11 @@ xlog_do_recovery_pass( /* blocks in data section */ bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); - error = xlog_bread(log, blk_no + hblks, bblks, dbp); + error = xlog_bread(log, blk_no + hblks, bblks, dbp, + &offset); if (error) goto bread_err2; - offset = xlog_align(log, blk_no + hblks, bblks, dbp); + xlog_unpack_data(rhead, offset, log); if ((error = xlog_recover_process_data(log, rhash, rhead, offset, pass))) @@ -3572,15 +3566,15 @@ xlog_do_recovery_pass( /* * Check for header wrapping around physical end-of-log */ - offset = NULL; + offset = XFS_BUF_PTR(hbp); split_hblks = 0; wrapped_hblks = 0; if (blk_no + hblks <= log->l_logBBsize) { /* Read header in one read */ - error = xlog_bread(log, blk_no, hblks, hbp); + error = xlog_bread(log, blk_no, hblks, hbp, + &offset); if (error) goto bread_err2; - offset = xlog_align(log, blk_no, hblks, hbp); } else { /* This LR is split across physical log end */ if (blk_no != log->l_logBBsize) { @@ -3588,12 +3582,13 @@ xlog_do_recovery_pass( ASSERT(blk_no <= INT_MAX); split_hblks = log->l_logBBsize - (int)blk_no; ASSERT(split_hblks > 0); - if ((error = xlog_bread(log, blk_no, - split_hblks, hbp))) + error = xlog_bread(log, blk_no, + split_hblks, hbp, + &offset); + if (error) goto bread_err2; - offset = xlog_align(log, blk_no, - split_hblks, hbp); } + /* * Note: this black magic still works with * large sector sizes (non-512) only because: @@ -3606,18 +3601,22 @@ xlog_do_recovery_pass( * _first_, then the log start (LR header end) * - order is important. */ - bufaddr = XFS_BUF_PTR(hbp); - XFS_BUF_SET_PTR(hbp, - bufaddr + BBTOB(split_hblks), - BBTOB(hblks - split_hblks)); wrapped_hblks = hblks - split_hblks; - error = xlog_bread(log, 0, wrapped_hblks, hbp); + error = XFS_BUF_SET_PTR(hbp, + offset + BBTOB(split_hblks), + BBTOB(hblks - split_hblks)); + if (error) + goto bread_err2; + + error = xlog_bread_noalign(log, 0, + wrapped_hblks, hbp); + if (error) + goto bread_err2; + + error = XFS_BUF_SET_PTR(hbp, offset, + BBTOB(hblks)); if (error) goto bread_err2; - XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks)); - if (!offset) - offset = xlog_align(log, 0, - wrapped_hblks, hbp); } rhead = (xlog_rec_header_t *)offset; error = xlog_valid_rec_header(log, rhead, @@ -3630,14 +3629,14 @@ xlog_do_recovery_pass( /* Read in data for log record */ if (blk_no + bblks <= log->l_logBBsize) { - error = xlog_bread(log, blk_no, bblks, dbp); + error = xlog_bread(log, blk_no, bblks, dbp, + &offset); if (error) goto bread_err2; - offset = xlog_align(log, blk_no, bblks, dbp); } else { /* This log record is split across the * physical end of log */ - offset = NULL; + offset = XFS_BUF_PTR(dbp); split_bblks = 0; if (blk_no != log->l_logBBsize) { /* some data is before the physical @@ -3647,12 +3646,13 @@ xlog_do_recovery_pass( split_bblks = log->l_logBBsize - (int)blk_no; ASSERT(split_bblks > 0); - if ((error = xlog_bread(log, blk_no, - split_bblks, dbp))) + error = xlog_bread(log, blk_no, + split_bblks, dbp, + &offset); + if (error) goto bread_err2; - offset = xlog_align(log, blk_no, - split_bblks, dbp); } + /* * Note: this black magic still works with * large sector sizes (non-512) only because: @@ -3665,17 +3665,21 @@ xlog_do_recovery_pass( * _first_, then the log start (LR header end) * - order is important. */ - bufaddr = XFS_BUF_PTR(dbp); - XFS_BUF_SET_PTR(dbp, - bufaddr + BBTOB(split_bblks), + error = XFS_BUF_SET_PTR(dbp, + offset + BBTOB(split_bblks), BBTOB(bblks - split_bblks)); - if ((error = xlog_bread(log, wrapped_hblks, - bblks - split_bblks, dbp))) + if (error) + goto bread_err2; + + error = xlog_bread_noalign(log, wrapped_hblks, + bblks - split_bblks, + dbp); + if (error) + goto bread_err2; + + error = XFS_BUF_SET_PTR(dbp, offset, h_size); + if (error) goto bread_err2; - XFS_BUF_SET_PTR(dbp, bufaddr, h_size); - if (!offset) - offset = xlog_align(log, wrapped_hblks, - bblks - split_bblks, dbp); } xlog_unpack_data(rhead, offset, log); if ((error = xlog_recover_process_data(log, rhash, @@ -3689,17 +3693,21 @@ xlog_do_recovery_pass( /* read first part of physical log */ while (blk_no < head_blk) { - if ((error = xlog_bread(log, blk_no, hblks, hbp))) + error = xlog_bread(log, blk_no, hblks, hbp, &offset); + if (error) goto bread_err2; - offset = xlog_align(log, blk_no, hblks, hbp); + rhead = (xlog_rec_header_t *)offset; error = xlog_valid_rec_header(log, rhead, blk_no); if (error) goto bread_err2; + bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); - if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) + error = xlog_bread(log, blk_no+hblks, bblks, dbp, + &offset); + if (error) goto bread_err2; - offset = xlog_align(log, blk_no+hblks, bblks, dbp); + xlog_unpack_data(rhead, offset, log); if ((error = xlog_recover_process_data(log, rhash, rhead, offset, pass))) @@ -3749,8 +3757,7 @@ xlog_do_log_recovery( error = xlog_do_recovery_pass(log, head_blk, tail_blk, XLOG_RECOVER_PASS1); if (error != 0) { - kmem_free(log->l_buf_cancel_table, - XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*)); + kmem_free(log->l_buf_cancel_table); log->l_buf_cancel_table = NULL; return error; } @@ -3769,8 +3776,7 @@ xlog_do_log_recovery( } #endif /* DEBUG */ - kmem_free(log->l_buf_cancel_table, - XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*)); + kmem_free(log->l_buf_cancel_table); log->l_buf_cancel_table = NULL; return error; @@ -3828,7 +3834,8 @@ xlog_do_recover( XFS_BUF_READ(bp); XFS_BUF_UNASYNC(bp); xfsbdstrat(log->l_mp, bp); - if ((error = xfs_iowait(bp))) { + error = xfs_iowait(bp); + if (error) { xfs_ioerror_alert("xlog_do_recover", log->l_mp, bp, XFS_BUF_ADDR(bp)); ASSERT(0); @@ -3907,8 +3914,7 @@ xlog_recover( */ int xlog_recover_finish( - xlog_t *log, - int mfsi_flags) + xlog_t *log) { /* * Now we're ready to do the transactions needed for the @@ -3919,19 +3925,23 @@ xlog_recover_finish( * rather than accepting new requests. */ if (log->l_flags & XLOG_RECOVERY_NEEDED) { - xlog_recover_process_efis(log); + int error; + error = xlog_recover_process_efis(log); + if (error) { + cmn_err(CE_ALERT, + "Failed to recover EFIs on filesystem: %s", + log->l_mp->m_fsname); + return error; + } /* * Sync the log to get all the EFIs out of the AIL. * This isn't absolutely necessary, but it helps in * case the unlink transactions would have problems * pushing the EFIs out of the way. */ - xfs_log_force(log->l_mp, (xfs_lsn_t)0, - (XFS_LOG_FORCE | XFS_LOG_SYNC)); + xfs_log_force(log->l_mp, XFS_LOG_SYNC); - if ( (mfsi_flags & XFS_MFSI_NOUNLINK) == 0 ) { - xlog_recover_process_iunlinks(log); - } + xlog_recover_process_iunlinks(log); xlog_recover_check_summary(log); @@ -3960,19 +3970,13 @@ xlog_recover_check_summary( { xfs_mount_t *mp; xfs_agf_t *agfp; - xfs_agi_t *agip; xfs_buf_t *agfbp; xfs_buf_t *agibp; - xfs_daddr_t agfdaddr; - xfs_daddr_t agidaddr; - xfs_buf_t *sbbp; -#ifdef XFS_LOUD_RECOVERY - xfs_sb_t *sbp; -#endif xfs_agnumber_t agno; __uint64_t freeblks; __uint64_t itotal; __uint64_t ifree; + int error; mp = log->l_mp; @@ -3980,62 +3984,27 @@ xlog_recover_check_summary( itotal = 0LL; ifree = 0LL; for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { - agfdaddr = XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)); - agfbp = xfs_buf_read(mp->m_ddev_targp, agfdaddr, - XFS_FSS_TO_BB(mp, 1), 0); - if (XFS_BUF_ISERROR(agfbp)) { - xfs_ioerror_alert("xlog_recover_check_summary(agf)", - mp, agfbp, agfdaddr); - } - agfp = XFS_BUF_TO_AGF(agfbp); - ASSERT(XFS_AGF_MAGIC == be32_to_cpu(agfp->agf_magicnum)); - ASSERT(XFS_AGF_GOOD_VERSION(be32_to_cpu(agfp->agf_versionnum))); - ASSERT(be32_to_cpu(agfp->agf_seqno) == agno); - - freeblks += be32_to_cpu(agfp->agf_freeblks) + - be32_to_cpu(agfp->agf_flcount); - xfs_buf_relse(agfbp); - - agidaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); - agibp = xfs_buf_read(mp->m_ddev_targp, agidaddr, - XFS_FSS_TO_BB(mp, 1), 0); - if (XFS_BUF_ISERROR(agibp)) { - xfs_ioerror_alert("xlog_recover_check_summary(agi)", - mp, agibp, agidaddr); + error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); + if (error) { + xfs_fs_cmn_err(CE_ALERT, mp, + "xlog_recover_check_summary(agf)" + "agf read failed agno %d error %d", + agno, error); + } else { + agfp = XFS_BUF_TO_AGF(agfbp); + freeblks += be32_to_cpu(agfp->agf_freeblks) + + be32_to_cpu(agfp->agf_flcount); + xfs_buf_relse(agfbp); } - agip = XFS_BUF_TO_AGI(agibp); - ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agip->agi_magicnum)); - ASSERT(XFS_AGI_GOOD_VERSION(be32_to_cpu(agip->agi_versionnum))); - ASSERT(be32_to_cpu(agip->agi_seqno) == agno); - itotal += be32_to_cpu(agip->agi_count); - ifree += be32_to_cpu(agip->agi_freecount); - xfs_buf_relse(agibp); - } + error = xfs_read_agi(mp, NULL, agno, &agibp); + if (!error) { + struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); - sbbp = xfs_getsb(mp, 0); -#ifdef XFS_LOUD_RECOVERY - sbp = &mp->m_sb; - xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(sbbp)); - cmn_err(CE_NOTE, - "xlog_recover_check_summary: sb_icount %Lu itotal %Lu", - sbp->sb_icount, itotal); - cmn_err(CE_NOTE, - "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu", - sbp->sb_ifree, ifree); - cmn_err(CE_NOTE, - "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu", - sbp->sb_fdblocks, freeblks); -#if 0 - /* - * This is turned off until I account for the allocation - * btree blocks which live in free space. - */ - ASSERT(sbp->sb_icount == itotal); - ASSERT(sbp->sb_ifree == ifree); - ASSERT(sbp->sb_fdblocks == freeblks); -#endif -#endif - xfs_buf_relse(sbbp); + itotal += be32_to_cpu(agi->agi_count); + ifree += be32_to_cpu(agi->agi_freecount); + xfs_buf_relse(agibp); + } + } } #endif /* DEBUG */