#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
-#include "xfs_imap.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_log_priv.h"
#include "xfs_quota.h"
#include "xfs_rw.h"
#include "xfs_utils.h"
+#include "xfs_trace.h"
STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
-STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q,
- xlog_recover_item_t *item);
#if defined(DEBUG)
STATIC void xlog_recover_check_summary(xlog_t *);
-STATIC void xlog_recover_check_ail(xfs_mount_t *, xfs_log_item_t *, int);
#else
#define xlog_recover_check_summary(log)
-#define xlog_recover_check_ail(mp, lip, gen)
#endif
-
/*
* Sector aligned buffer routines for buffer create/read/write/access
*/
-#define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \
- ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \
- ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) )
-#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask)
+/*
+ * Verify the given count of basic blocks is valid number of blocks
+ * to specify for an operation involving the given XFS log buffer.
+ * Returns nonzero if the count is valid, 0 otherwise.
+ */
-xfs_buf_t *
-xlog_get_bp(
+static inline int
+xlog_buf_bbcount_valid(
xlog_t *log,
- int num_bblks)
+ int bbcount)
{
- ASSERT(num_bblks > 0);
+ return bbcount > 0 && bbcount <= log->l_logBBsize;
+}
- if (log->l_sectbb_log) {
- if (num_bblks > 1)
- num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
- num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks);
+/*
+ * Allocate a buffer to hold log data. The buffer needs to be able
+ * to map to a range of nbblks basic blocks at any valid (basic
+ * block) offset within the log.
+ */
+STATIC xfs_buf_t *
+xlog_get_bp(
+ xlog_t *log,
+ int nbblks)
+{
+ if (!xlog_buf_bbcount_valid(log, nbblks)) {
+ xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+ nbblks);
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
+ return NULL;
}
- return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp);
+
+ /*
+ * We do log I/O in units of log sectors (a power-of-2
+ * multiple of the basic block size), so we round up the
+ * requested size to acommodate the basic blocks required
+ * for complete log sectors.
+ *
+ * In addition, the buffer may be used for a non-sector-
+ * aligned block offset, in which case an I/O of the
+ * requested size could extend beyond the end of the
+ * buffer. If the requested size is only 1 basic block it
+ * will never straddle a sector boundary, so this won't be
+ * an issue. Nor will this be a problem if the log I/O is
+ * done in basic blocks (sector size 1). But otherwise we
+ * extend the buffer by one extra log sector to ensure
+ * there's space to accomodate this possiblility.
+ */
+ if (nbblks > 1 && log->l_sectBBsize > 1)
+ nbblks += log->l_sectBBsize;
+ nbblks = round_up(nbblks, log->l_sectBBsize);
+
+ return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp);
}
-void
+STATIC void
xlog_put_bp(
xfs_buf_t *bp)
{
xfs_buf_free(bp);
}
+/*
+ * Return the address of the start of the given block number's data
+ * in a log buffer. The buffer covers a log sector-aligned region.
+ */
+STATIC xfs_caddr_t
+xlog_align(
+ xlog_t *log,
+ xfs_daddr_t blk_no,
+ int nbblks,
+ xfs_buf_t *bp)
+{
+ xfs_daddr_t offset;
+ xfs_caddr_t ptr;
+
+ offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1);
+ ptr = XFS_BUF_PTR(bp) + BBTOB(offset);
+
+ ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp));
+
+ return ptr;
+}
+
/*
* nbblks should be uint, but oh well. Just want to catch that 32-bit length.
*/
-int
-xlog_bread(
+STATIC int
+xlog_bread_noalign(
xlog_t *log,
xfs_daddr_t blk_no,
int nbblks,
{
int error;
- if (log->l_sectbb_log) {
- blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
- nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
+ if (!xlog_buf_bbcount_valid(log, nbblks)) {
+ xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+ nbblks);
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
+ return EFSCORRUPTED;
}
+ blk_no = round_down(blk_no, log->l_sectBBsize);
+ nbblks = round_up(nbblks, log->l_sectBBsize);
+
ASSERT(nbblks > 0);
ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
- ASSERT(bp);
XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
XFS_BUF_READ(bp);
XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
xfsbdstrat(log->l_mp, bp);
- if ((error = xfs_iowait(bp)))
+ error = xfs_iowait(bp);
+ if (error)
xfs_ioerror_alert("xlog_bread", log->l_mp,
bp, XFS_BUF_ADDR(bp));
return error;
}
+STATIC int
+xlog_bread(
+ xlog_t *log,
+ xfs_daddr_t blk_no,
+ int nbblks,
+ xfs_buf_t *bp,
+ xfs_caddr_t *offset)
+{
+ int error;
+
+ error = xlog_bread_noalign(log, blk_no, nbblks, bp);
+ if (error)
+ return error;
+
+ *offset = xlog_align(log, blk_no, nbblks, bp);
+ return 0;
+}
+
/*
* Write out the buffer at the given block for the given number of blocks.
* The buffer is kept locked across the write and is returned locked.
{
int error;
- if (log->l_sectbb_log) {
- blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
- nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
+ if (!xlog_buf_bbcount_valid(log, nbblks)) {
+ xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+ nbblks);
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
+ return EFSCORRUPTED;
}
+ blk_no = round_down(blk_no, log->l_sectBBsize);
+ nbblks = round_up(nbblks, log->l_sectBBsize);
+
ASSERT(nbblks > 0);
ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
return error;
}
-STATIC xfs_caddr_t
-xlog_align(
- xlog_t *log,
- xfs_daddr_t blk_no,
- int nbblks,
- xfs_buf_t *bp)
-{
- xfs_caddr_t ptr;
-
- if (!log->l_sectbb_log)
- return XFS_BUF_PTR(bp);
-
- ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask);
- ASSERT(XFS_BUF_SIZE(bp) >=
- BBTOB(nbblks + (blk_no & log->l_sectbb_mask)));
- return ptr;
-}
-
#ifdef DEBUG
/*
* dump debug superblock and log record information
xfs_mount_t *mp,
xlog_rec_header_t *head)
{
- int b;
-
- cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__);
- for (b = 0; b < 16; b++)
- cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]);
- cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT);
- cmn_err(CE_DEBUG, " log : uuid = ");
- for (b = 0; b < 16; b++)
- cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]);
- cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt));
+ cmn_err(CE_DEBUG, "%s: SB : uuid = %pU, fmt = %d\n",
+ __func__, &mp->m_sb.sb_uuid, XLOG_FMT);
+ cmn_err(CE_DEBUG, " log : uuid = %pU, fmt = %d\n",
+ &head->h_fs_uuid, be32_to_cpu(head->h_fmt));
}
#else
#define xlog_header_check_dump(mp, head)
xlog_recover_iodone(
struct xfs_buf *bp)
{
- xfs_mount_t *mp;
-
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *));
-
if (XFS_BUF_GETERROR(bp)) {
/*
* We're not going to bother about retrying
* this during recovery. One strike!
*/
- mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *);
xfs_ioerror_alert("xlog_recover_iodone",
- mp, bp, XFS_BUF_ADDR(bp));
- xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+ bp->b_mount, bp, XFS_BUF_ADDR(bp));
+ xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
}
- XFS_BUF_SET_FSPRIVATE(bp, NULL);
+ bp->b_mount = NULL;
XFS_BUF_CLR_IODONE_FUNC(bp);
xfs_biodone(bp);
}
{
xfs_caddr_t offset;
xfs_daddr_t mid_blk;
+ xfs_daddr_t end_blk;
uint mid_cycle;
int error;
- mid_blk = BLK_AVG(first_blk, *last_blk);
- while (mid_blk != first_blk && mid_blk != *last_blk) {
- if ((error = xlog_bread(log, mid_blk, 1, bp)))
+ end_blk = *last_blk;
+ mid_blk = BLK_AVG(first_blk, end_blk);
+ while (mid_blk != first_blk && mid_blk != end_blk) {
+ error = xlog_bread(log, mid_blk, 1, bp, &offset);
+ if (error)
return error;
- offset = xlog_align(log, mid_blk, 1, bp);
mid_cycle = xlog_get_cycle(offset);
- if (mid_cycle == cycle) {
- *last_blk = mid_blk;
- /* last_half_cycle == mid_cycle */
- } else {
- first_blk = mid_blk;
- /* first_half_cycle == mid_cycle */
- }
- mid_blk = BLK_AVG(first_blk, *last_blk);
+ if (mid_cycle == cycle)
+ end_blk = mid_blk; /* last_half_cycle == mid_cycle */
+ else
+ first_blk = mid_blk; /* first_half_cycle == mid_cycle */
+ mid_blk = BLK_AVG(first_blk, end_blk);
}
- ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) ||
- (mid_blk == *last_blk && mid_blk-1 == first_blk));
+ ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) ||
+ (mid_blk == end_blk && mid_blk-1 == first_blk));
+
+ *last_blk = end_blk;
return 0;
}
/*
- * Check that the range of blocks does not contain the cycle number
- * given. The scan needs to occur from front to back and the ptr into the
- * region must be updated since a later routine will need to perform another
- * test. If the region is completely good, we end up returning the same
- * last block number.
- *
- * Set blkno to -1 if we encounter no errors. This is an invalid block number
- * since we don't ever expect logs to get this large.
+ * Check that a range of blocks does not contain stop_on_cycle_no.
+ * Fill in *new_blk with the block offset where such a block is
+ * found, or with -1 (an invalid block number) if there is no such
+ * block in the range. The scan needs to occur from front to back
+ * and the pointer into the region must be updated since a later
+ * routine will need to perform another test.
*/
STATIC int
xlog_find_verify_cycle(
xfs_caddr_t buf = NULL;
int error = 0;
+ /*
+ * Greedily allocate a buffer big enough to handle the full
+ * range of basic blocks we'll be examining. If that fails,
+ * try a smaller size. We need to be able to read at least
+ * a log sector, or we're out of luck.
+ */
bufblks = 1 << ffs(nbblks);
-
while (!(bp = xlog_get_bp(log, bufblks))) {
- /* can't get enough memory to do everything in one big buffer */
bufblks >>= 1;
- if (bufblks <= log->l_sectbb_log)
+ if (bufblks < log->l_sectBBsize)
return ENOMEM;
}
bcount = min(bufblks, (start_blk + nbblks - i));
- if ((error = xlog_bread(log, i, bcount, bp)))
+ error = xlog_bread(log, i, bcount, bp, &buf);
+ if (error)
goto out;
- buf = xlog_align(log, i, bcount, bp);
for (j = 0; j < bcount; j++) {
cycle = xlog_get_cycle(buf);
if (cycle == stop_on_cycle_no) {
return ENOMEM;
smallmem = 1;
} else {
- if ((error = xlog_bread(log, start_blk, num_blks, bp)))
+ error = xlog_bread(log, start_blk, num_blks, bp, &offset);
+ if (error)
goto out;
- offset = xlog_align(log, start_blk, num_blks, bp);
offset += ((num_blks - 1) << BBSHIFT);
}
}
if (smallmem) {
- if ((error = xlog_bread(log, i, 1, bp)))
+ error = xlog_bread(log, i, 1, bp, &offset);
+ if (error)
goto out;
- offset = xlog_align(log, i, 1, bp);
}
head = (xlog_rec_header_t *)offset;
bp = xlog_get_bp(log, 1);
if (!bp)
return ENOMEM;
- if ((error = xlog_bread(log, 0, 1, bp)))
+
+ error = xlog_bread(log, 0, 1, bp, &offset);
+ if (error)
goto bp_err;
- offset = xlog_align(log, 0, 1, bp);
+
first_half_cycle = xlog_get_cycle(offset);
last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */
- if ((error = xlog_bread(log, last_blk, 1, bp)))
+ error = xlog_bread(log, last_blk, 1, bp, &offset);
+ if (error)
goto bp_err;
- offset = xlog_align(log, last_blk, 1, bp);
+
last_half_cycle = xlog_get_cycle(offset);
ASSERT(last_half_cycle != 0);
* In this case we want to find the first block with cycle
* number matching last_half_cycle. We expect the log to be
* some variation on
- * x + 1 ... | x ...
+ * x + 1 ... | x ... | x
* The first block with cycle number x (last_half_cycle) will
* be where the new head belongs. First we do a binary search
* for the first occurrence of last_half_cycle. The binary
* the log, then we look for occurrences of last_half_cycle - 1
* at the end of the log. The cases we're looking for look
* like
- * x + 1 ... | x | x + 1 | x ...
- * ^ binary search stopped here
+ * v binary search stopped here
+ * x + 1 ... | x | x + 1 | x ... | x
+ * ^ but we want to locate this spot
* or
- * x + 1 ... | x ... | x - 1 | x
* <---------> less than scan distance
+ * x + 1 ... | x ... | x - 1 | x
+ * ^ we want to locate this spot
*/
stop_on_cycle = last_half_cycle;
if ((error = xlog_find_cycle_start(log, bp, first_blk,
* certainly not the head of the log. By searching for
* last_half_cycle-1 we accomplish that.
*/
- start_blk = log_bbnum - num_scan_bblks + head_blk;
ASSERT(head_blk <= INT_MAX &&
- (xfs_daddr_t) num_scan_bblks - head_blk >= 0);
+ (xfs_daddr_t) num_scan_bblks >= head_blk);
+ start_blk = log_bbnum - (num_scan_bblks - head_blk);
if ((error = xlog_find_verify_cycle(log, start_blk,
num_scan_bblks - (int)head_blk,
(stop_on_cycle - 1), &new_blk)))
goto bp_err;
if (new_blk != -1) {
head_blk = new_blk;
- goto bad_blk;
+ goto validate_head;
}
/*
head_blk = new_blk;
}
- bad_blk:
+validate_head:
/*
* Now we need to make sure head_blk is not pointing to a block in
* the middle of a log record.
if ((error = xlog_find_verify_log_record(log, start_blk,
&head_blk, 0)) == -1) {
/* We hit the beginning of the log during our search */
- start_blk = log_bbnum - num_scan_bblks + head_blk;
+ start_blk = log_bbnum - (num_scan_bblks - head_blk);
new_blk = log_bbnum;
ASSERT(start_blk <= INT_MAX &&
(xfs_daddr_t) log_bbnum-start_blk >= 0);
* We could speed up search by using current head_blk buffer, but it is not
* available.
*/
-int
+STATIC int
xlog_find_tail(
xlog_t *log,
xfs_daddr_t *head_blk,
if (!bp)
return ENOMEM;
if (*head_blk == 0) { /* special case */
- if ((error = xlog_bread(log, 0, 1, bp)))
- goto bread_err;
- offset = xlog_align(log, 0, 1, bp);
+ error = xlog_bread(log, 0, 1, bp, &offset);
+ if (error)
+ goto done;
+
if (xlog_get_cycle(offset) == 0) {
*tail_blk = 0;
/* leave all other log inited values alone */
- goto exit;
+ goto done;
}
}
*/
ASSERT(*head_blk < INT_MAX);
for (i = (int)(*head_blk) - 1; i >= 0; i--) {
- if ((error = xlog_bread(log, i, 1, bp)))
- goto bread_err;
- offset = xlog_align(log, i, 1, bp);
+ error = xlog_bread(log, i, 1, bp, &offset);
+ if (error)
+ goto done;
+
if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
found = 1;
break;
*/
if (!found) {
for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
- if ((error = xlog_bread(log, i, 1, bp)))
- goto bread_err;
- offset = xlog_align(log, i, 1, bp);
+ error = xlog_bread(log, i, 1, bp, &offset);
+ if (error)
+ goto done;
+
if (XLOG_HEADER_MAGIC_NUM ==
be32_to_cpu(*(__be32 *)offset)) {
found = 2;
if (*head_blk == after_umount_blk &&
be32_to_cpu(rhead->h_num_logops) == 1) {
umount_data_blk = (i + hblks) % log->l_logBBsize;
- if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
- goto bread_err;
- }
- offset = xlog_align(log, umount_data_blk, 1, bp);
+ error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+ if (error)
+ goto done;
+
op_head = (xlog_op_header_t *)offset;
if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
/*
* But... if the -device- itself is readonly, just skip this.
* We can't recover this device anyway, so it won't matter.
*/
- if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
+ if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp))
error = xlog_clear_stale_blocks(log, tail_lsn);
- }
-bread_err:
-exit:
+done:
xlog_put_bp(bp);
if (error)
bp = xlog_get_bp(log, 1);
if (!bp)
return ENOMEM;
- if ((error = xlog_bread(log, 0, 1, bp)))
+ error = xlog_bread(log, 0, 1, bp, &offset);
+ if (error)
goto bp_err;
- offset = xlog_align(log, 0, 1, bp);
+
first_cycle = xlog_get_cycle(offset);
if (first_cycle == 0) { /* completely zeroed log */
*blk_no = 0;
}
/* check partially zeroed log */
- if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
+ error = xlog_bread(log, log_bbnum-1, 1, bp, &offset);
+ if (error)
goto bp_err;
- offset = xlog_align(log, log_bbnum-1, 1, bp);
+
last_cycle = xlog_get_cycle(offset);
if (last_cycle != 0) { /* log completely written to */
xlog_put_bp(bp);
xfs_caddr_t offset;
xfs_buf_t *bp;
int balign, ealign;
- int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
+ int sectbb = log->l_sectBBsize;
int end_block = start_block + blocks;
int bufblks;
int error = 0;
int i, j = 0;
+ /*
+ * Greedily allocate a buffer big enough to handle the full
+ * range of basic blocks to be written. If that fails, try
+ * a smaller size. We need to be able to write at least a
+ * log sector, or we're out of luck.
+ */
bufblks = 1 << ffs(blocks);
while (!(bp = xlog_get_bp(log, bufblks))) {
bufblks >>= 1;
- if (bufblks <= log->l_sectbb_log)
+ if (bufblks < sectbb)
return ENOMEM;
}
* the buffer in the starting sector not covered by the first
* write below.
*/
- balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block);
+ balign = round_down(start_block, sectbb);
if (balign != start_block) {
- if ((error = xlog_bread(log, start_block, 1, bp))) {
- xlog_put_bp(bp);
- return error;
- }
+ error = xlog_bread_noalign(log, start_block, 1, bp);
+ if (error)
+ goto out_put_bp;
+
j = start_block - balign;
}
* the buffer in the final sector not covered by the write.
* If this is the same sector as the above read, skip it.
*/
- ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block);
+ ealign = round_down(end_block, sectbb);
if (j == 0 && (start_block + endcount > ealign)) {
offset = XFS_BUF_PTR(bp);
balign = BBTOB(ealign - start_block);
- XFS_BUF_SET_PTR(bp, offset + balign, BBTOB(sectbb));
- if ((error = xlog_bread(log, ealign, sectbb, bp)))
+ error = XFS_BUF_SET_PTR(bp, offset + balign,
+ BBTOB(sectbb));
+ if (error)
+ break;
+
+ error = xlog_bread_noalign(log, ealign, sectbb, bp);
+ if (error)
+ break;
+
+ error = XFS_BUF_SET_PTR(bp, offset, bufblks);
+ if (error)
break;
- XFS_BUF_SET_PTR(bp, offset, bufblks);
}
offset = xlog_align(log, start_block, endcount, bp);
start_block += endcount;
j = 0;
}
+
+ out_put_bp:
xlog_put_bp(bp);
return error;
}
STATIC xlog_recover_t *
xlog_recover_find_tid(
- xlog_recover_t *q,
+ struct hlist_head *head,
xlog_tid_t tid)
{
- xlog_recover_t *p = q;
+ xlog_recover_t *trans;
+ struct hlist_node *n;
- while (p != NULL) {
- if (p->r_log_tid == tid)
- break;
- p = p->r_next;
+ hlist_for_each_entry(trans, n, head, r_list) {
+ if (trans->r_log_tid == tid)
+ return trans;
}
- return p;
+ return NULL;
}
STATIC void
-xlog_recover_put_hashq(
- xlog_recover_t **q,
- xlog_recover_t *trans)
+xlog_recover_new_tid(
+ struct hlist_head *head,
+ xlog_tid_t tid,
+ xfs_lsn_t lsn)
{
- trans->r_next = *q;
- *q = trans;
+ xlog_recover_t *trans;
+
+ trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
+ trans->r_log_tid = tid;
+ trans->r_lsn = lsn;
+ INIT_LIST_HEAD(&trans->r_itemq);
+
+ INIT_HLIST_NODE(&trans->r_list);
+ hlist_add_head(&trans->r_list, head);
}
STATIC void
xlog_recover_add_item(
- xlog_recover_item_t **itemq)
+ struct list_head *head)
{
xlog_recover_item_t *item;
item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
- xlog_recover_insert_item_backq(itemq, item);
+ INIT_LIST_HEAD(&item->ri_list);
+ list_add_tail(&item->ri_list, head);
}
STATIC int
xlog_recover_add_to_cont_trans(
+ struct log *log,
xlog_recover_t *trans,
xfs_caddr_t dp,
int len)
xfs_caddr_t ptr, old_ptr;
int old_len;
- item = trans->r_itemq;
- if (item == NULL) {
+ if (list_empty(&trans->r_itemq)) {
/* finish copying rest of trans header */
xlog_recover_add_item(&trans->r_itemq);
ptr = (xfs_caddr_t) &trans->r_theader +
memcpy(ptr, dp, len); /* d, s, l */
return 0;
}
- item = item->ri_prev;
+ /* take the tail entry */
+ item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
old_len = item->ri_buf[item->ri_cnt-1].i_len;
memcpy(&ptr[old_len], dp, len); /* d, s, l */
item->ri_buf[item->ri_cnt-1].i_len += len;
item->ri_buf[item->ri_cnt-1].i_addr = ptr;
+ trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
return 0;
}
*/
STATIC int
xlog_recover_add_to_trans(
+ struct log *log,
xlog_recover_t *trans,
xfs_caddr_t dp,
int len)
if (!len)
return 0;
- item = trans->r_itemq;
- if (item == NULL) {
- ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
+ if (list_empty(&trans->r_itemq)) {
+ /* we need to catch log corruptions here */
+ if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
+ xlog_warn("XFS: xlog_recover_add_to_trans: "
+ "bad header magic number");
+ ASSERT(0);
+ return XFS_ERROR(EIO);
+ }
if (len == sizeof(xfs_trans_header_t))
xlog_recover_add_item(&trans->r_itemq);
memcpy(&trans->r_theader, dp, len); /* d, s, l */
memcpy(ptr, dp, len);
in_f = (xfs_inode_log_format_t *)ptr;
- if (item->ri_prev->ri_total != 0 &&
- item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
+ /* take the tail entry */
+ item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
+ if (item->ri_total != 0 &&
+ item->ri_total == item->ri_cnt) {
+ /* tail item is in use, get a new one */
xlog_recover_add_item(&trans->r_itemq);
+ item = list_entry(trans->r_itemq.prev,
+ xlog_recover_item_t, ri_list);
}
- item = trans->r_itemq;
- item = item->ri_prev;
if (item->ri_total == 0) { /* first region to be added */
- item->ri_total = in_f->ilf_size;
- ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM);
- item->ri_buf = kmem_zalloc((item->ri_total *
- sizeof(xfs_log_iovec_t)), KM_SLEEP);
+ if (in_f->ilf_size == 0 ||
+ in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
+ xlog_warn(
+ "XFS: bad number of regions (%d) in inode log format",
+ in_f->ilf_size);
+ ASSERT(0);
+ return XFS_ERROR(EIO);
+ }
+
+ item->ri_total = in_f->ilf_size;
+ item->ri_buf =
+ kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
+ KM_SLEEP);
}
ASSERT(item->ri_total > item->ri_cnt);
/* Description region is ri_buf[0] */
item->ri_buf[item->ri_cnt].i_addr = ptr;
item->ri_buf[item->ri_cnt].i_len = len;
item->ri_cnt++;
+ trace_xfs_log_recover_item_add(log, trans, item, 0);
return 0;
}
-STATIC void
-xlog_recover_new_tid(
- xlog_recover_t **q,
- xlog_tid_t tid,
- xfs_lsn_t lsn)
-{
- xlog_recover_t *trans;
-
- trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
- trans->r_log_tid = tid;
- trans->r_lsn = lsn;
- xlog_recover_put_hashq(q, trans);
-}
-
-STATIC int
-xlog_recover_unlink_tid(
- xlog_recover_t **q,
- xlog_recover_t *trans)
-{
- xlog_recover_t *tp;
- int found = 0;
-
- ASSERT(trans != NULL);
- if (trans == *q) {
- *q = (*q)->r_next;
- } else {
- tp = *q;
- while (tp) {
- if (tp->r_next == trans) {
- found = 1;
- break;
- }
- tp = tp->r_next;
- }
- if (!found) {
- xlog_warn(
- "XFS: xlog_recover_unlink_tid: trans not found");
- ASSERT(0);
- return XFS_ERROR(EIO);
- }
- tp->r_next = tp->r_next->r_next;
- }
- return 0;
-}
-
-STATIC void
-xlog_recover_insert_item_backq(
- xlog_recover_item_t **q,
- xlog_recover_item_t *item)
-{
- if (*q == NULL) {
- item->ri_prev = item->ri_next = item;
- *q = item;
- } else {
- item->ri_next = *q;
- item->ri_prev = (*q)->ri_prev;
- (*q)->ri_prev = item;
- item->ri_prev->ri_next = item;
- }
-}
-
-STATIC void
-xlog_recover_insert_item_frontq(
- xlog_recover_item_t **q,
- xlog_recover_item_t *item)
-{
- xlog_recover_insert_item_backq(q, item);
- *q = item;
-}
-
+/*
+ * Sort the log items in the transaction. Cancelled buffers need
+ * to be put first so they are processed before any items that might
+ * modify the buffers. If they are cancelled, then the modifications
+ * don't need to be replayed.
+ */
STATIC int
xlog_recover_reorder_trans(
- xlog_recover_t *trans)
+ struct log *log,
+ xlog_recover_t *trans,
+ int pass)
{
- xlog_recover_item_t *first_item, *itemq, *itemq_next;
- xfs_buf_log_format_t *buf_f;
- ushort flags = 0;
+ xlog_recover_item_t *item, *n;
+ LIST_HEAD(sort_list);
+
+ list_splice_init(&trans->r_itemq, &sort_list);
+ list_for_each_entry_safe(item, n, &sort_list, ri_list) {
+ xfs_buf_log_format_t *buf_f;
- first_item = itemq = trans->r_itemq;
- trans->r_itemq = NULL;
- do {
- itemq_next = itemq->ri_next;
- buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr;
+ buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
- switch (ITEM_TYPE(itemq)) {
+ switch (ITEM_TYPE(item)) {
case XFS_LI_BUF:
- flags = buf_f->blf_flags;
- if (!(flags & XFS_BLI_CANCEL)) {
- xlog_recover_insert_item_frontq(&trans->r_itemq,
- itemq);
+ if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) {
+ trace_xfs_log_recover_item_reorder_head(log,
+ trans, item, pass);
+ list_move(&item->ri_list, &trans->r_itemq);
break;
}
case XFS_LI_INODE:
case XFS_LI_QUOTAOFF:
case XFS_LI_EFD:
case XFS_LI_EFI:
- xlog_recover_insert_item_backq(&trans->r_itemq, itemq);
+ trace_xfs_log_recover_item_reorder_tail(log,
+ trans, item, pass);
+ list_move_tail(&item->ri_list, &trans->r_itemq);
break;
default:
xlog_warn(
ASSERT(0);
return XFS_ERROR(EIO);
}
- itemq = itemq_next;
- } while (first_item != itemq);
+ }
+ ASSERT(list_empty(&sort_list));
return 0;
}
/*
* If this isn't a cancel buffer item, then just return.
*/
- if (!(flags & XFS_BLI_CANCEL))
+ if (!(flags & XFS_BLI_CANCEL)) {
+ trace_xfs_log_recover_buf_not_cancel(log, buf_f);
return;
+ }
/*
* Insert an xfs_buf_cancel record into the hash table of
while (nextp != NULL) {
if (nextp->bc_blkno == blkno && nextp->bc_len == len) {
nextp->bc_refcount++;
+ trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
return;
}
prevp = nextp;
bcp->bc_refcount = 1;
bcp->bc_next = NULL;
prevp->bc_next = bcp;
+ trace_xfs_log_recover_buf_cancel_add(log, buf_f);
}
/*
} else {
prevp->bc_next = bcp->bc_next;
}
- kmem_free(bcp,
- sizeof(xfs_buf_cancel_t));
+ kmem_free(bcp);
}
}
return 1;
unsigned int *data_map = NULL;
unsigned int map_size = 0;
+ trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
+
switch (buf_f->blf_type) {
case XFS_LI_BUF:
data_map = buf_f->blf_data_map;
/*ARGSUSED*/
STATIC void
xlog_recover_do_reg_buffer(
+ struct xfs_mount *mp,
xlog_recover_item_t *item,
xfs_buf_t *bp,
xfs_buf_log_format_t *buf_f)
unsigned int map_size = 0;
int error;
+ trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
+
switch (buf_f->blf_type) {
case XFS_LI_BUF:
data_map = buf_f->blf_data_map;
error = 0;
if (buf_f->blf_flags &
(XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
+ if (item->ri_buf[i].i_addr == NULL) {
+ cmn_err(CE_ALERT,
+ "XFS: NULL dquot in %s.", __func__);
+ goto next;
+ }
+ if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) {
+ cmn_err(CE_ALERT,
+ "XFS: dquot too small (%d) in %s.",
+ item->ri_buf[i].i_len, __func__);
+ goto next;
+ }
error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
item->ri_buf[i].i_addr,
-1, 0, XFS_QMOPT_DOWARN,
"dquot_buf_recover");
+ if (error)
+ goto next;
}
- if (!error)
- memcpy(xfs_buf_offset(bp,
- (uint)bit << XFS_BLI_SHIFT), /* dest */
- item->ri_buf[i].i_addr, /* source */
- nbits<<XFS_BLI_SHIFT); /* length */
+
+ memcpy(xfs_buf_offset(bp,
+ (uint)bit << XFS_BLI_SHIFT), /* dest */
+ item->ri_buf[i].i_addr, /* source */
+ nbits<<XFS_BLI_SHIFT); /* length */
+ next:
i++;
bit += nbits;
}
{
uint type;
+ trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
+
/*
* Filesystems are required to send in quota flags at mount time.
*/
if (log->l_quotaoffs_flag & type)
return;
- xlog_recover_do_reg_buffer(item, bp, buf_f);
+ xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
}
/*
xfs_daddr_t blkno;
int len;
ushort flags;
+ uint buf_flags;
buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
*/
cancel = xlog_recover_do_buffer_pass2(log, buf_f);
if (cancel) {
+ trace_xfs_log_recover_buf_cancel(log, buf_f);
return 0;
}
}
+ trace_xfs_log_recover_buf_recover(log, buf_f);
switch (buf_f->blf_type) {
case XFS_LI_BUF:
blkno = buf_f->blf_blkno;
}
mp = log->l_mp;
- if (flags & XFS_BLI_INODE_BUF) {
- bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len,
- XFS_BUF_LOCK);
- } else {
- bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0);
- }
+ buf_flags = XBF_LOCK;
+ if (!(flags & XFS_BLI_INODE_BUF))
+ buf_flags |= XBF_MAPPED;
+
+ bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags);
if (XFS_BUF_ISERROR(bp)) {
xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp,
bp, blkno);
(XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
} else {
- xlog_recover_do_reg_buffer(item, bp, buf_f);
+ xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
}
if (error)
return XFS_ERROR(error);
XFS_BUF_STALE(bp);
error = xfs_bwrite(mp, bp);
} else {
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
- XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp);
- XFS_BUF_SET_FSPRIVATE(bp, mp);
+ ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
+ bp->b_mount = mp;
XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
xfs_bdwrite(mp, bp);
}
xfs_inode_log_format_t *in_f;
xfs_mount_t *mp;
xfs_buf_t *bp;
- xfs_imap_t imap;
xfs_dinode_t *dip;
xfs_ino_t ino;
int len;
}
ino = in_f->ilf_ino;
mp = log->l_mp;
- if (ITEM_TYPE(item) == XFS_LI_INODE) {
- imap.im_blkno = (xfs_daddr_t)in_f->ilf_blkno;
- imap.im_len = in_f->ilf_len;
- imap.im_boffset = in_f->ilf_boffset;
- } else {
- /*
- * It's an old inode format record. We don't know where
- * its cluster is located on disk, and we can't allow
- * xfs_imap() to figure it out because the inode btrees
- * are not ready to be used. Therefore do not pass the
- * XFS_IMAP_LOOKUP flag to xfs_imap(). This will give
- * us only the single block in which the inode lives
- * rather than its cluster, so we must make sure to
- * invalidate the buffer when we write it out below.
- */
- imap.im_blkno = 0;
- xfs_imap(log->l_mp, NULL, ino, &imap, 0);
- }
/*
* Inode buffers can be freed, look out for it,
* and do not replay the inode.
*/
- if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0)) {
+ if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
+ in_f->ilf_len, 0)) {
error = 0;
+ trace_xfs_log_recover_inode_cancel(log, in_f);
goto error;
}
+ trace_xfs_log_recover_inode_recover(log, in_f);
- bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len,
- XFS_BUF_LOCK);
+ bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
+ XBF_LOCK);
if (XFS_BUF_ISERROR(bp)) {
xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
- bp, imap.im_blkno);
+ bp, in_f->ilf_blkno);
error = XFS_BUF_GETERROR(bp);
xfs_buf_relse(bp);
goto error;
}
error = 0;
ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
- dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
+ dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
/*
* Make sure the place we're flushing out to really looks
* like an inode!
*/
- if (unlikely(be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC)) {
+ if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) {
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
"xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld",
}
/* Skip replay when the on disk inode is newer than the log one */
- if (dicp->di_flushiter < be16_to_cpu(dip->di_core.di_flushiter)) {
+ if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
/*
* Deal with the wrap case, DI_MAX_FLUSH is less
* than smaller numbers
*/
- if (be16_to_cpu(dip->di_core.di_flushiter) == DI_MAX_FLUSH &&
+ if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
/* do nothing */
} else {
xfs_buf_relse(bp);
+ trace_xfs_log_recover_inode_skip(log, in_f);
error = 0;
goto error;
}
error = EFSCORRUPTED;
goto error;
}
- if (unlikely(item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t))) {
+ if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)",
XFS_ERRLEVEL_LOW, mp, dicp);
xfs_buf_relse(bp);
}
/* The core is in in-core format */
- xfs_dinode_to_disk(&dip->di_core,
- (xfs_icdinode_t *)item->ri_buf[1].i_addr);
+ xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr);
/* the rest is in on-disk format */
- if (item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t)) {
- memcpy((xfs_caddr_t) dip + sizeof(xfs_dinode_core_t),
- item->ri_buf[1].i_addr + sizeof(xfs_dinode_core_t),
- item->ri_buf[1].i_len - sizeof(xfs_dinode_core_t));
+ if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
+ memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode),
+ item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode),
+ item->ri_buf[1].i_len - sizeof(struct xfs_icdinode));
}
fields = in_f->ilf_fields;
switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) {
case XFS_ILOG_DEV:
- dip->di_u.di_dev = cpu_to_be32(in_f->ilf_u.ilfu_rdev);
+ xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
break;
case XFS_ILOG_UUID:
- dip->di_u.di_muuid = in_f->ilf_u.ilfu_uuid;
+ memcpy(XFS_DFORK_DPTR(dip),
+ &in_f->ilf_u.ilfu_uuid,
+ sizeof(uuid_t));
break;
}
switch (fields & XFS_ILOG_DFORK) {
case XFS_ILOG_DDATA:
case XFS_ILOG_DEXT:
- memcpy(&dip->di_u, src, len);
+ memcpy(XFS_DFORK_DPTR(dip), src, len);
break;
case XFS_ILOG_DBROOT:
- xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len,
- &(dip->di_u.di_bmbt),
+ xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
+ (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip),
XFS_DFORK_DSIZE(dip, mp));
break;
case XFS_ILOG_ABROOT:
dest = XFS_DFORK_APTR(dip);
- xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len,
- (xfs_bmdr_block_t*)dest,
+ xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
+ len, (xfs_bmdr_block_t*)dest,
XFS_DFORK_ASIZE(dip, mp));
break;
}
write_inode_buffer:
- if (ITEM_TYPE(item) == XFS_LI_INODE) {
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
- XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp);
- XFS_BUF_SET_FSPRIVATE(bp, mp);
- XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
- xfs_bdwrite(mp, bp);
- } else {
- XFS_BUF_STALE(bp);
- error = xfs_bwrite(mp, bp);
- }
-
+ ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
+ bp->b_mount = mp;
+ XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
+ xfs_bdwrite(mp, bp);
error:
if (need_free)
- kmem_free(in_f, sizeof(*in_f));
+ kmem_free(in_f);
return XFS_ERROR(error);
}
return (0);
recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
- ASSERT(recddq);
+
+ if (item->ri_buf[1].i_addr == NULL) {
+ cmn_err(CE_ALERT,
+ "XFS: NULL dquot in %s.", __func__);
+ return XFS_ERROR(EIO);
+ }
+ if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
+ cmn_err(CE_ALERT,
+ "XFS: dquot too small (%d) in %s.",
+ item->ri_buf[1].i_len, __func__);
+ return XFS_ERROR(EIO);
+ }
+
/*
* This type of quotas was turned off, so ignore this record.
*/
memcpy(ddq, recddq, item->ri_buf[1].i_len);
ASSERT(dq_f->qlf_size == 2);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
- XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp);
- XFS_BUF_SET_FSPRIVATE(bp, mp);
+ ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
+ bp->b_mount = mp;
XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
xfs_bdwrite(mp, bp);
efip->efi_next_extent = efi_formatp->efi_nextents;
efip->efi_flags |= XFS_EFI_COMMITTED;
- spin_lock(&mp->m_ail_lock);
+ spin_lock(&log->l_ailp->xa_lock);
/*
- * xfs_trans_update_ail() drops the AIL lock.
+ * xfs_trans_ail_update() drops the AIL lock.
*/
- xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn);
+ xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn);
return 0;
}
xlog_recover_item_t *item,
int pass)
{
- xfs_mount_t *mp;
xfs_efd_log_format_t *efd_formatp;
xfs_efi_log_item_t *efip = NULL;
xfs_log_item_t *lip;
- int gen;
__uint64_t efi_id;
+ struct xfs_ail_cursor cur;
+ struct xfs_ail *ailp = log->l_ailp;
if (pass == XLOG_RECOVER_PASS1) {
return;
* Search for the efi with the id in the efd format structure
* in the AIL.
*/
- mp = log->l_mp;
- spin_lock(&mp->m_ail_lock);
- lip = xfs_trans_first_ail(mp, &gen);
+ spin_lock(&ailp->xa_lock);
+ lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
if (lip->li_type == XFS_LI_EFI) {
efip = (xfs_efi_log_item_t *)lip;
if (efip->efi_format.efi_id == efi_id) {
/*
- * xfs_trans_delete_ail() drops the
+ * xfs_trans_ail_delete() drops the
* AIL lock.
*/
- xfs_trans_delete_ail(mp, lip);
+ xfs_trans_ail_delete(ailp, lip);
xfs_efi_item_free(efip);
- return;
+ spin_lock(&ailp->xa_lock);
+ break;
}
}
- lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
+ lip = xfs_trans_ail_cursor_next(ailp, &cur);
}
- spin_unlock(&mp->m_ail_lock);
+ xfs_trans_ail_cursor_done(ailp, &cur);
+ spin_unlock(&ailp->xa_lock);
}
/*
int pass)
{
int error = 0;
- xlog_recover_item_t *item, *first_item;
+ xlog_recover_item_t *item;
- if ((error = xlog_recover_reorder_trans(trans)))
+ error = xlog_recover_reorder_trans(log, trans, pass);
+ if (error)
return error;
- first_item = item = trans->r_itemq;
- do {
- /*
- * we don't need to worry about the block number being
- * truncated in > 1 TB buffers because in user-land,
- * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so
- * the blknos will get through the user-mode buffer
- * cache properly. The only bad case is o32 kernels
- * where xfs_daddr_t is 32-bits but mount will warn us
- * off a > 1 TB filesystem before we get here.
- */
- if ((ITEM_TYPE(item) == XFS_LI_BUF)) {
- if ((error = xlog_recover_do_buffer_trans(log, item,
- pass)))
- break;
- } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) {
- if ((error = xlog_recover_do_inode_trans(log, item,
- pass)))
- break;
- } else if (ITEM_TYPE(item) == XFS_LI_EFI) {
- if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn,
- pass)))
- break;
- } else if (ITEM_TYPE(item) == XFS_LI_EFD) {
+
+ list_for_each_entry(item, &trans->r_itemq, ri_list) {
+ trace_xfs_log_recover_item_recover(log, trans, item, pass);
+ switch (ITEM_TYPE(item)) {
+ case XFS_LI_BUF:
+ error = xlog_recover_do_buffer_trans(log, item, pass);
+ break;
+ case XFS_LI_INODE:
+ error = xlog_recover_do_inode_trans(log, item, pass);
+ break;
+ case XFS_LI_EFI:
+ error = xlog_recover_do_efi_trans(log, item,
+ trans->r_lsn, pass);
+ break;
+ case XFS_LI_EFD:
xlog_recover_do_efd_trans(log, item, pass);
- } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) {
- if ((error = xlog_recover_do_dquot_trans(log, item,
- pass)))
- break;
- } else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) {
- if ((error = xlog_recover_do_quotaoff_trans(log, item,
- pass)))
- break;
- } else {
- xlog_warn("XFS: xlog_recover_do_trans");
+ error = 0;
+ break;
+ case XFS_LI_DQUOT:
+ error = xlog_recover_do_dquot_trans(log, item, pass);
+ break;
+ case XFS_LI_QUOTAOFF:
+ error = xlog_recover_do_quotaoff_trans(log, item,
+ pass);
+ break;
+ default:
+ xlog_warn(
+ "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item));
ASSERT(0);
error = XFS_ERROR(EIO);
break;
}
- item = item->ri_next;
- } while (first_item != item);
- return error;
+ if (error)
+ return error;
+ }
+
+ return 0;
}
/*
xlog_recover_free_trans(
xlog_recover_t *trans)
{
- xlog_recover_item_t *first_item, *item, *free_item;
+ xlog_recover_item_t *item, *n;
int i;
- item = first_item = trans->r_itemq;
- do {
- free_item = item;
- item = item->ri_next;
- /* Free the regions in the item. */
- for (i = 0; i < free_item->ri_cnt; i++) {
- kmem_free(free_item->ri_buf[i].i_addr,
- free_item->ri_buf[i].i_len);
- }
+ list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
+ /* Free the regions in the item. */
+ list_del(&item->ri_list);
+ for (i = 0; i < item->ri_cnt; i++)
+ kmem_free(item->ri_buf[i].i_addr);
/* Free the item itself */
- kmem_free(free_item->ri_buf,
- (free_item->ri_total * sizeof(xfs_log_iovec_t)));
- kmem_free(free_item, sizeof(xlog_recover_item_t));
- } while (first_item != item);
+ kmem_free(item->ri_buf);
+ kmem_free(item);
+ }
/* Free the transaction recover structure */
- kmem_free(trans, sizeof(xlog_recover_t));
+ kmem_free(trans);
}
STATIC int
xlog_recover_commit_trans(
xlog_t *log,
- xlog_recover_t **q,
xlog_recover_t *trans,
int pass)
{
int error;
- if ((error = xlog_recover_unlink_tid(q, trans)))
- return error;
+ hlist_del(&trans->r_list);
if ((error = xlog_recover_do_trans(log, trans, pass)))
return error;
xlog_recover_free_trans(trans); /* no error */
STATIC int
xlog_recover_process_data(
xlog_t *log,
- xlog_recover_t *rhash[],
+ struct hlist_head rhash[],
xlog_rec_header_t *rhead,
xfs_caddr_t dp,
int pass)
}
tid = be32_to_cpu(ohead->oh_tid);
hash = XLOG_RHASH(tid);
- trans = xlog_recover_find_tid(rhash[hash], tid);
+ trans = xlog_recover_find_tid(&rhash[hash], tid);
if (trans == NULL) { /* not found; add new tid */
if (ohead->oh_flags & XLOG_START_TRANS)
xlog_recover_new_tid(&rhash[hash], tid,
switch (flags) {
case XLOG_COMMIT_TRANS:
error = xlog_recover_commit_trans(log,
- &rhash[hash], trans, pass);
+ trans, pass);
break;
case XLOG_UNMOUNT_TRANS:
error = xlog_recover_unmount_trans(trans);
break;
case XLOG_WAS_CONT_TRANS:
- error = xlog_recover_add_to_cont_trans(trans,
- dp, be32_to_cpu(ohead->oh_len));
+ error = xlog_recover_add_to_cont_trans(log,
+ trans, dp,
+ be32_to_cpu(ohead->oh_len));
break;
case XLOG_START_TRANS:
xlog_warn(
break;
case 0:
case XLOG_CONTINUE_TRANS:
- error = xlog_recover_add_to_trans(trans,
+ error = xlog_recover_add_to_trans(log, trans,
dp, be32_to_cpu(ohead->oh_len));
break;
default:
* Process an extent free intent item that was recovered from
* the log. We need to free the extents that it describes.
*/
-STATIC void
+STATIC int
xlog_recover_process_efi(
xfs_mount_t *mp,
xfs_efi_log_item_t *efip)
xfs_efd_log_item_t *efdp;
xfs_trans_t *tp;
int i;
+ int error = 0;
xfs_extent_t *extp;
xfs_fsblock_t startblock_fsb;
* free the memory associated with it.
*/
xfs_efi_release(efip, efip->efi_format.efi_nextents);
- return;
+ return XFS_ERROR(EIO);
}
}
tp = xfs_trans_alloc(mp, 0);
- xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0);
+ error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0);
+ if (error)
+ goto abort_error;
efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
for (i = 0; i < efip->efi_format.efi_nextents; i++) {
extp = &(efip->efi_format.efi_extents[i]);
- xfs_free_extent(tp, extp->ext_start, extp->ext_len);
+ error = xfs_free_extent(tp, extp->ext_start, extp->ext_len);
+ if (error)
+ goto abort_error;
xfs_trans_log_efd_extent(tp, efdp, extp->ext_start,
extp->ext_len);
}
efip->efi_flags |= XFS_EFI_RECOVERED;
- xfs_trans_commit(tp, 0);
-}
-
-/*
- * Verify that once we've encountered something other than an EFI
- * in the AIL that there are no more EFIs in the AIL.
- */
-#if defined(DEBUG)
-STATIC void
-xlog_recover_check_ail(
- xfs_mount_t *mp,
- xfs_log_item_t *lip,
- int gen)
-{
- int orig_gen = gen;
+ error = xfs_trans_commit(tp, 0);
+ return error;
- do {
- ASSERT(lip->li_type != XFS_LI_EFI);
- lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
- /*
- * The check will be bogus if we restart from the
- * beginning of the AIL, so ASSERT that we don't.
- * We never should since we're holding the AIL lock
- * the entire time.
- */
- ASSERT(gen == orig_gen);
- } while (lip != NULL);
+abort_error:
+ xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+ return error;
}
-#endif /* DEBUG */
/*
* When this is called, all of the EFIs which did not have
* everything already in the AIL, we stop processing as soon as
* we see something other than an EFI in the AIL.
*/
-STATIC void
+STATIC int
xlog_recover_process_efis(
xlog_t *log)
{
xfs_log_item_t *lip;
xfs_efi_log_item_t *efip;
- int gen;
- xfs_mount_t *mp;
-
- mp = log->l_mp;
- spin_lock(&mp->m_ail_lock);
+ int error = 0;
+ struct xfs_ail_cursor cur;
+ struct xfs_ail *ailp;
- lip = xfs_trans_first_ail(mp, &gen);
+ ailp = log->l_ailp;
+ spin_lock(&ailp->xa_lock);
+ lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
/*
* We're done when we see something other than an EFI.
+ * There should be no EFIs left in the AIL now.
*/
if (lip->li_type != XFS_LI_EFI) {
- xlog_recover_check_ail(mp, lip, gen);
+#ifdef DEBUG
+ for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
+ ASSERT(lip->li_type != XFS_LI_EFI);
+#endif
break;
}
*/
efip = (xfs_efi_log_item_t *)lip;
if (efip->efi_flags & XFS_EFI_RECOVERED) {
- lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
+ lip = xfs_trans_ail_cursor_next(ailp, &cur);
continue;
}
- spin_unlock(&mp->m_ail_lock);
- xlog_recover_process_efi(mp, efip);
- spin_lock(&mp->m_ail_lock);
- lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
+ spin_unlock(&ailp->xa_lock);
+ error = xlog_recover_process_efi(log->l_mp, efip);
+ spin_lock(&ailp->xa_lock);
+ if (error)
+ goto out;
+ lip = xfs_trans_ail_cursor_next(ailp, &cur);
}
- spin_unlock(&mp->m_ail_lock);
+out:
+ xfs_trans_ail_cursor_done(ailp, &cur);
+ spin_unlock(&ailp->xa_lock);
+ return error;
}
/*
int error;
tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET);
- xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0);
+ error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp),
+ 0, 0, 0);
+ if (error)
+ goto out_abort;
- error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
- XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1), 0, &agibp);
- if (error) {
- xfs_trans_cancel(tp, XFS_TRANS_ABORT);
- return;
- }
+ error = xfs_read_agi(mp, tp, agno, &agibp);
+ if (error)
+ goto out_abort;
agi = XFS_BUF_TO_AGI(agibp);
- if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) {
- xfs_trans_cancel(tp, XFS_TRANS_ABORT);
- return;
- }
-
agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
offset = offsetof(xfs_agi_t, agi_unlinked) +
(sizeof(xfs_agino_t) * bucket);
xfs_trans_log_buf(tp, agibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
- (void) xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp, 0);
+ if (error)
+ goto out_error;
+ return;
+
+out_abort:
+ xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+out_error:
+ xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: "
+ "failed to clear agi %d. Continuing.", agno);
+ return;
+}
+
+STATIC xfs_agino_t
+xlog_recover_process_one_iunlink(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_agino_t agino,
+ int bucket)
+{
+ struct xfs_buf *ibp;
+ struct xfs_dinode *dip;
+ struct xfs_inode *ip;
+ xfs_ino_t ino;
+ int error;
+
+ ino = XFS_AGINO_TO_INO(mp, agno, agino);
+ error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0);
+ if (error)
+ goto fail;
+
+ /*
+ * Get the on disk inode to find the next inode in the bucket.
+ */
+ error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK);
+ if (error)
+ goto fail_iput;
+
+ ASSERT(ip->i_d.di_nlink == 0);
+ ASSERT(ip->i_d.di_mode != 0);
+
+ /* setup for the next pass */
+ agino = be32_to_cpu(dip->di_next_unlinked);
+ xfs_buf_relse(ibp);
+
+ /*
+ * Prevent any DMAPI event from being sent when the reference on
+ * the inode is dropped.
+ */
+ ip->i_d.di_dmevmask = 0;
+
+ IRELE(ip);
+ return agino;
+
+ fail_iput:
+ IRELE(ip);
+ fail:
+ /*
+ * We can't read in the inode this bucket points to, or this inode
+ * is messed up. Just ditch this bucket of inodes. We will lose
+ * some inodes and space, but at least we won't hang.
+ *
+ * Call xlog_recover_clear_agi_bucket() to perform a transaction to
+ * clear the inode pointer in the bucket.
+ */
+ xlog_recover_clear_agi_bucket(mp, agno, bucket);
+ return NULLAGINO;
}
/*
* freeing of the inode and its removal from the list must be
* atomic.
*/
-void
+STATIC void
xlog_recover_process_iunlinks(
xlog_t *log)
{
xfs_agnumber_t agno;
xfs_agi_t *agi;
xfs_buf_t *agibp;
- xfs_buf_t *ibp;
- xfs_dinode_t *dip;
- xfs_inode_t *ip;
xfs_agino_t agino;
- xfs_ino_t ino;
int bucket;
int error;
uint mp_dmevmask;
/*
* Find the agi for this ag.
*/
- agibp = xfs_buf_read(mp->m_ddev_targp,
- XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1), 0);
- if (XFS_BUF_ISERROR(agibp)) {
- xfs_ioerror_alert("xlog_recover_process_iunlinks(#1)",
- log->l_mp, agibp,
- XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)));
+ error = xfs_read_agi(mp, NULL, agno, &agibp);
+ if (error) {
+ /*
+ * AGI is b0rked. Don't process it.
+ *
+ * We should probably mark the filesystem as corrupt
+ * after we've recovered all the ag's we can....
+ */
+ continue;
}
agi = XFS_BUF_TO_AGI(agibp);
- ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agi->agi_magicnum));
for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
-
agino = be32_to_cpu(agi->agi_unlinked[bucket]);
while (agino != NULLAGINO) {
-
/*
* Release the agi buffer so that it can
* be acquired in the normal course of the
*/
xfs_buf_relse(agibp);
- ino = XFS_AGINO_TO_INO(mp, agno, agino);
- error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0);
- ASSERT(error || (ip != NULL));
-
- if (!error) {
- /*
- * Get the on disk inode to find the
- * next inode in the bucket.
- */
- error = xfs_itobp(mp, NULL, ip, &dip,
- &ibp, 0, 0,
- XFS_BUF_LOCK);
- ASSERT(error || (dip != NULL));
- }
-
- if (!error) {
- ASSERT(ip->i_d.di_nlink == 0);
-
- /* setup for the next pass */
- agino = be32_to_cpu(
- dip->di_next_unlinked);
- xfs_buf_relse(ibp);
- /*
- * Prevent any DMAPI event from
- * being sent when the
- * reference on the inode is
- * dropped.
- */
- ip->i_d.di_dmevmask = 0;
-
- /*
- * If this is a new inode, handle
- * it specially. Otherwise,
- * just drop our reference to the
- * inode. If there are no
- * other references, this will
- * send the inode to
- * xfs_inactive() which will
- * truncate the file and free
- * the inode.
- */
- if (ip->i_d.di_mode == 0)
- xfs_iput_new(ip, 0);
- else
- IRELE(ip);
- } else {
- /*
- * We can't read in the inode
- * this bucket points to, or
- * this inode is messed up. Just
- * ditch this bucket of inodes. We
- * will lose some inodes and space,
- * but at least we won't hang. Call
- * xlog_recover_clear_agi_bucket()
- * to perform a transaction to clear
- * the inode pointer in the bucket.
- */
- xlog_recover_clear_agi_bucket(mp, agno,
- bucket);
-
- agino = NULLAGINO;
- }
+ agino = xlog_recover_process_one_iunlink(mp,
+ agno, agino, bucket);
/*
* Reacquire the agibuffer and continue around
- * the loop.
+ * the loop. This should never fail as we know
+ * the buffer was good earlier on.
*/
- agibp = xfs_buf_read(mp->m_ddev_targp,
- XFS_AG_DADDR(mp, agno,
- XFS_AGI_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1), 0);
- if (XFS_BUF_ISERROR(agibp)) {
- xfs_ioerror_alert(
- "xlog_recover_process_iunlinks(#2)",
- log->l_mp, agibp,
- XFS_AG_DADDR(mp, agno,
- XFS_AGI_DADDR(mp)));
- }
+ error = xfs_read_agi(mp, NULL, agno, &agibp);
+ ASSERT(error == 0);
agi = XFS_BUF_TO_AGI(agibp);
- ASSERT(XFS_AGI_MAGIC == be32_to_cpu(
- agi->agi_magicnum));
}
}
int size = iclog->ic_offset + roundoff;
__be32 cycle_lsn;
xfs_caddr_t dp;
- xlog_in_core_2_t *xhdr;
xlog_pack_data_checksum(log, iclog, size);
}
if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
- xhdr = (xlog_in_core_2_t *)&iclog->ic_header;
+ xlog_in_core_2_t *xhdr = iclog->ic_data;
+
for ( ; i < BTOBB(size); i++) {
j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
}
}
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-STATIC void
-xlog_unpack_data_checksum(
- xlog_rec_header_t *rhead,
- xfs_caddr_t dp,
- xlog_t *log)
-{
- __be32 *up = (__be32 *)dp;
- uint chksum = 0;
- int i;
-
- /* divide length by 4 to get # words */
- for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) {
- chksum ^= be32_to_cpu(*up);
- up++;
- }
- if (chksum != be32_to_cpu(rhead->h_chksum)) {
- if (rhead->h_chksum ||
- ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
- cmn_err(CE_DEBUG,
- "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n",
- be32_to_cpu(rhead->h_chksum), chksum);
- cmn_err(CE_DEBUG,
-"XFS: Disregard message if filesystem was created with non-DEBUG kernel");
- if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
- cmn_err(CE_DEBUG,
- "XFS: LogR this is a LogV2 filesystem\n");
- }
- log->l_flags |= XLOG_CHKSUM_MISMATCH;
- }
- }
-}
-#else
-#define xlog_unpack_data_checksum(rhead, dp, log)
-#endif
-
STATIC void
xlog_unpack_data(
xlog_rec_header_t *rhead,
xlog_t *log)
{
int i, j, k;
- xlog_in_core_2_t *xhdr;
for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
}
if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
- xhdr = (xlog_in_core_2_t *)rhead;
+ xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
dp += BBSIZE;
}
}
-
- xlog_unpack_data_checksum(rhead, dp, log);
}
STATIC int
{
xlog_rec_header_t *rhead;
xfs_daddr_t blk_no;
- xfs_caddr_t bufaddr, offset;
+ xfs_caddr_t offset;
xfs_buf_t *hbp, *dbp;
int error = 0, h_size;
int bblks, split_bblks;
int hblks, split_hblks, wrapped_hblks;
- xlog_recover_t *rhash[XLOG_RHASH_SIZE];
+ struct hlist_head rhash[XLOG_RHASH_SIZE];
ASSERT(head_blk != tail_blk);
hbp = xlog_get_bp(log, 1);
if (!hbp)
return ENOMEM;
- if ((error = xlog_bread(log, tail_blk, 1, hbp)))
+
+ error = xlog_bread(log, tail_blk, 1, hbp, &offset);
+ if (error)
goto bread_err1;
- offset = xlog_align(log, tail_blk, 1, hbp);
+
rhead = (xlog_rec_header_t *)offset;
error = xlog_valid_rec_header(log, rhead, tail_blk);
if (error)
hblks = 1;
}
} else {
- ASSERT(log->l_sectbb_log == 0);
+ ASSERT(log->l_sectBBsize == 1);
hblks = 1;
hbp = xlog_get_bp(log, 1);
h_size = XLOG_BIG_RECORD_BSIZE;
memset(rhash, 0, sizeof(rhash));
if (tail_blk <= head_blk) {
for (blk_no = tail_blk; blk_no < head_blk; ) {
- if ((error = xlog_bread(log, blk_no, hblks, hbp)))
+ error = xlog_bread(log, blk_no, hblks, hbp, &offset);
+ if (error)
goto bread_err2;
- offset = xlog_align(log, blk_no, hblks, hbp);
+
rhead = (xlog_rec_header_t *)offset;
error = xlog_valid_rec_header(log, rhead, blk_no);
if (error)
/* blocks in data section */
bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
- error = xlog_bread(log, blk_no + hblks, bblks, dbp);
+ error = xlog_bread(log, blk_no + hblks, bblks, dbp,
+ &offset);
if (error)
goto bread_err2;
- offset = xlog_align(log, blk_no + hblks, bblks, dbp);
+
xlog_unpack_data(rhead, offset, log);
if ((error = xlog_recover_process_data(log,
rhash, rhead, offset, pass)))
/*
* Check for header wrapping around physical end-of-log
*/
- offset = NULL;
+ offset = XFS_BUF_PTR(hbp);
split_hblks = 0;
wrapped_hblks = 0;
if (blk_no + hblks <= log->l_logBBsize) {
/* Read header in one read */
- error = xlog_bread(log, blk_no, hblks, hbp);
+ error = xlog_bread(log, blk_no, hblks, hbp,
+ &offset);
if (error)
goto bread_err2;
- offset = xlog_align(log, blk_no, hblks, hbp);
} else {
/* This LR is split across physical log end */
if (blk_no != log->l_logBBsize) {
ASSERT(blk_no <= INT_MAX);
split_hblks = log->l_logBBsize - (int)blk_no;
ASSERT(split_hblks > 0);
- if ((error = xlog_bread(log, blk_no,
- split_hblks, hbp)))
+ error = xlog_bread(log, blk_no,
+ split_hblks, hbp,
+ &offset);
+ if (error)
goto bread_err2;
- offset = xlog_align(log, blk_no,
- split_hblks, hbp);
}
+
/*
* Note: this black magic still works with
* large sector sizes (non-512) only because:
* _first_, then the log start (LR header end)
* - order is important.
*/
- bufaddr = XFS_BUF_PTR(hbp);
- XFS_BUF_SET_PTR(hbp,
- bufaddr + BBTOB(split_hblks),
- BBTOB(hblks - split_hblks));
wrapped_hblks = hblks - split_hblks;
- error = xlog_bread(log, 0, wrapped_hblks, hbp);
+ error = XFS_BUF_SET_PTR(hbp,
+ offset + BBTOB(split_hblks),
+ BBTOB(hblks - split_hblks));
+ if (error)
+ goto bread_err2;
+
+ error = xlog_bread_noalign(log, 0,
+ wrapped_hblks, hbp);
+ if (error)
+ goto bread_err2;
+
+ error = XFS_BUF_SET_PTR(hbp, offset,
+ BBTOB(hblks));
if (error)
goto bread_err2;
- XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks));
- if (!offset)
- offset = xlog_align(log, 0,
- wrapped_hblks, hbp);
}
rhead = (xlog_rec_header_t *)offset;
error = xlog_valid_rec_header(log, rhead,
/* Read in data for log record */
if (blk_no + bblks <= log->l_logBBsize) {
- error = xlog_bread(log, blk_no, bblks, dbp);
+ error = xlog_bread(log, blk_no, bblks, dbp,
+ &offset);
if (error)
goto bread_err2;
- offset = xlog_align(log, blk_no, bblks, dbp);
} else {
/* This log record is split across the
* physical end of log */
- offset = NULL;
+ offset = XFS_BUF_PTR(dbp);
split_bblks = 0;
if (blk_no != log->l_logBBsize) {
/* some data is before the physical
split_bblks =
log->l_logBBsize - (int)blk_no;
ASSERT(split_bblks > 0);
- if ((error = xlog_bread(log, blk_no,
- split_bblks, dbp)))
+ error = xlog_bread(log, blk_no,
+ split_bblks, dbp,
+ &offset);
+ if (error)
goto bread_err2;
- offset = xlog_align(log, blk_no,
- split_bblks, dbp);
}
+
/*
* Note: this black magic still works with
* large sector sizes (non-512) only because:
* _first_, then the log start (LR header end)
* - order is important.
*/
- bufaddr = XFS_BUF_PTR(dbp);
- XFS_BUF_SET_PTR(dbp,
- bufaddr + BBTOB(split_bblks),
+ error = XFS_BUF_SET_PTR(dbp,
+ offset + BBTOB(split_bblks),
BBTOB(bblks - split_bblks));
- if ((error = xlog_bread(log, wrapped_hblks,
- bblks - split_bblks, dbp)))
+ if (error)
+ goto bread_err2;
+
+ error = xlog_bread_noalign(log, wrapped_hblks,
+ bblks - split_bblks,
+ dbp);
+ if (error)
+ goto bread_err2;
+
+ error = XFS_BUF_SET_PTR(dbp, offset, h_size);
+ if (error)
goto bread_err2;
- XFS_BUF_SET_PTR(dbp, bufaddr, h_size);
- if (!offset)
- offset = xlog_align(log, wrapped_hblks,
- bblks - split_bblks, dbp);
}
xlog_unpack_data(rhead, offset, log);
if ((error = xlog_recover_process_data(log, rhash,
/* read first part of physical log */
while (blk_no < head_blk) {
- if ((error = xlog_bread(log, blk_no, hblks, hbp)))
+ error = xlog_bread(log, blk_no, hblks, hbp, &offset);
+ if (error)
goto bread_err2;
- offset = xlog_align(log, blk_no, hblks, hbp);
+
rhead = (xlog_rec_header_t *)offset;
error = xlog_valid_rec_header(log, rhead, blk_no);
if (error)
goto bread_err2;
+
bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
- if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
+ error = xlog_bread(log, blk_no+hblks, bblks, dbp,
+ &offset);
+ if (error)
goto bread_err2;
- offset = xlog_align(log, blk_no+hblks, bblks, dbp);
+
xlog_unpack_data(rhead, offset, log);
if ((error = xlog_recover_process_data(log, rhash,
rhead, offset, pass)))
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
XLOG_RECOVER_PASS1);
if (error != 0) {
- kmem_free(log->l_buf_cancel_table,
- XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
+ kmem_free(log->l_buf_cancel_table);
log->l_buf_cancel_table = NULL;
return error;
}
}
#endif /* DEBUG */
- kmem_free(log->l_buf_cancel_table,
- XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
+ kmem_free(log->l_buf_cancel_table);
log->l_buf_cancel_table = NULL;
return error;
XFS_BUF_READ(bp);
XFS_BUF_UNASYNC(bp);
xfsbdstrat(log->l_mp, bp);
- if ((error = xfs_iowait(bp))) {
+ error = xfs_iowait(bp);
+ if (error) {
xfs_ioerror_alert("xlog_do_recover",
log->l_mp, bp, XFS_BUF_ADDR(bp));
ASSERT(0);
*/
int
xlog_recover_finish(
- xlog_t *log,
- int mfsi_flags)
+ xlog_t *log)
{
/*
* Now we're ready to do the transactions needed for the
* rather than accepting new requests.
*/
if (log->l_flags & XLOG_RECOVERY_NEEDED) {
- xlog_recover_process_efis(log);
+ int error;
+ error = xlog_recover_process_efis(log);
+ if (error) {
+ cmn_err(CE_ALERT,
+ "Failed to recover EFIs on filesystem: %s",
+ log->l_mp->m_fsname);
+ return error;
+ }
/*
* Sync the log to get all the EFIs out of the AIL.
* This isn't absolutely necessary, but it helps in
* case the unlink transactions would have problems
* pushing the EFIs out of the way.
*/
- xfs_log_force(log->l_mp, (xfs_lsn_t)0,
- (XFS_LOG_FORCE | XFS_LOG_SYNC));
+ xfs_log_force(log->l_mp, XFS_LOG_SYNC);
- if ( (mfsi_flags & XFS_MFSI_NOUNLINK) == 0 ) {
- xlog_recover_process_iunlinks(log);
- }
+ xlog_recover_process_iunlinks(log);
xlog_recover_check_summary(log);
{
xfs_mount_t *mp;
xfs_agf_t *agfp;
- xfs_agi_t *agip;
xfs_buf_t *agfbp;
xfs_buf_t *agibp;
- xfs_daddr_t agfdaddr;
- xfs_daddr_t agidaddr;
- xfs_buf_t *sbbp;
-#ifdef XFS_LOUD_RECOVERY
- xfs_sb_t *sbp;
-#endif
xfs_agnumber_t agno;
__uint64_t freeblks;
__uint64_t itotal;
__uint64_t ifree;
+ int error;
mp = log->l_mp;
itotal = 0LL;
ifree = 0LL;
for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
- agfdaddr = XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp));
- agfbp = xfs_buf_read(mp->m_ddev_targp, agfdaddr,
- XFS_FSS_TO_BB(mp, 1), 0);
- if (XFS_BUF_ISERROR(agfbp)) {
- xfs_ioerror_alert("xlog_recover_check_summary(agf)",
- mp, agfbp, agfdaddr);
- }
- agfp = XFS_BUF_TO_AGF(agfbp);
- ASSERT(XFS_AGF_MAGIC == be32_to_cpu(agfp->agf_magicnum));
- ASSERT(XFS_AGF_GOOD_VERSION(be32_to_cpu(agfp->agf_versionnum)));
- ASSERT(be32_to_cpu(agfp->agf_seqno) == agno);
-
- freeblks += be32_to_cpu(agfp->agf_freeblks) +
- be32_to_cpu(agfp->agf_flcount);
- xfs_buf_relse(agfbp);
-
- agidaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp));
- agibp = xfs_buf_read(mp->m_ddev_targp, agidaddr,
- XFS_FSS_TO_BB(mp, 1), 0);
- if (XFS_BUF_ISERROR(agibp)) {
- xfs_ioerror_alert("xlog_recover_check_summary(agi)",
- mp, agibp, agidaddr);
+ error = xfs_read_agf(mp, NULL, agno, 0, &agfbp);
+ if (error) {
+ xfs_fs_cmn_err(CE_ALERT, mp,
+ "xlog_recover_check_summary(agf)"
+ "agf read failed agno %d error %d",
+ agno, error);
+ } else {
+ agfp = XFS_BUF_TO_AGF(agfbp);
+ freeblks += be32_to_cpu(agfp->agf_freeblks) +
+ be32_to_cpu(agfp->agf_flcount);
+ xfs_buf_relse(agfbp);
}
- agip = XFS_BUF_TO_AGI(agibp);
- ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agip->agi_magicnum));
- ASSERT(XFS_AGI_GOOD_VERSION(be32_to_cpu(agip->agi_versionnum)));
- ASSERT(be32_to_cpu(agip->agi_seqno) == agno);
- itotal += be32_to_cpu(agip->agi_count);
- ifree += be32_to_cpu(agip->agi_freecount);
- xfs_buf_relse(agibp);
- }
+ error = xfs_read_agi(mp, NULL, agno, &agibp);
+ if (!error) {
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp);
- sbbp = xfs_getsb(mp, 0);
-#ifdef XFS_LOUD_RECOVERY
- sbp = &mp->m_sb;
- xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(sbbp));
- cmn_err(CE_NOTE,
- "xlog_recover_check_summary: sb_icount %Lu itotal %Lu",
- sbp->sb_icount, itotal);
- cmn_err(CE_NOTE,
- "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu",
- sbp->sb_ifree, ifree);
- cmn_err(CE_NOTE,
- "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu",
- sbp->sb_fdblocks, freeblks);
-#if 0
- /*
- * This is turned off until I account for the allocation
- * btree blocks which live in free space.
- */
- ASSERT(sbp->sb_icount == itotal);
- ASSERT(sbp->sb_ifree == ifree);
- ASSERT(sbp->sb_fdblocks == freeblks);
-#endif
-#endif
- xfs_buf_relse(sbbp);
+ itotal += be32_to_cpu(agi->agi_count);
+ ifree += be32_to_cpu(agi->agi_freecount);
+ xfs_buf_relse(agibp);
+ }
+ }
}
#endif /* DEBUG */