Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
[safe/jmp/linux-2.6] / fs / ocfs2 / buffer_head_io.c
index 7e947c6..f9d5d3f 100644 (file)
@@ -25,7 +25,6 @@
 
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/highmem.h>
 
 #include <cluster/masklog.h>
 
 #include "buffer_head_io.h"
 
+/*
+ * Bits on bh->b_state used by ocfs2.
+ *
+ * These MUST be after the JBD2 bits.  Hence, we use BH_JBDPrivateStart.
+ */
+enum ocfs2_state_bits {
+       BH_NeedsValidate = BH_JBDPrivateStart,
+};
+
+/* Expand the magic b_state functions */
+BUFFER_FNS(NeedsValidate, needs_validate);
+
 int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
-                     struct inode *inode)
+                     struct ocfs2_caching_info *ci)
 {
        int ret = 0;
 
-       mlog_entry("(bh->b_blocknr = %llu, inode=%p)\n",
-                  (unsigned long long)bh->b_blocknr, inode);
+       mlog_entry("(bh->b_blocknr = %llu, ci=%p)\n",
+                  (unsigned long long)bh->b_blocknr, ci);
 
        BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
        BUG_ON(buffer_jbd(bh));
@@ -58,7 +69,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
                goto out;
        }
 
-       mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
+       ocfs2_metadata_cache_io_lock(ci);
 
        lock_buffer(bh);
        set_buffer_uptodate(bh);
@@ -73,7 +84,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
        wait_on_buffer(bh);
 
        if (buffer_uptodate(bh)) {
-               ocfs2_set_buffer_uptodate(inode, bh);
+               ocfs2_set_buffer_uptodate(ci, bh);
        } else {
                /* We don't need to remove the clustered uptodate
                 * information for this bh as it's not marked locally
@@ -82,7 +93,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
                put_bh(bh);
        }
 
-       mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+       ocfs2_metadata_cache_io_unlock(ci);
 out:
        mlog_exit(ret);
        return ret;
@@ -112,7 +123,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
                bh = bhs[i];
 
                if (buffer_jbd(bh)) {
-                       mlog(ML_ERROR,
+                       mlog(ML_BH_IO,
                             "trying to sync read a jbd "
                             "managed bh (blocknr = %llu), skipping\n",
                             (unsigned long long)bh->b_blocknr);
@@ -147,15 +158,10 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
        for (i = nr; i > 0; i--) {
                bh = bhs[i - 1];
 
-               if (buffer_jbd(bh)) {
-                       mlog(ML_ERROR,
-                            "the journal got the buffer while it was "
-                            "locked for io! (blocknr = %llu)\n",
-                            (unsigned long long)bh->b_blocknr);
-                       BUG();
-               }
+               /* No need to wait on the buffer if it's managed by JBD. */
+               if (!buffer_jbd(bh))
+                       wait_on_buffer(bh);
 
-               wait_on_buffer(bh);
                if (!buffer_uptodate(bh)) {
                        /* Status won't be cleared from here on out,
                         * so we can safely record this and loop back
@@ -170,17 +176,20 @@ bail:
        return status;
 }
 
-int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
-                     struct buffer_head *bhs[], int flags)
+int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
+                     struct buffer_head *bhs[], int flags,
+                     int (*validate)(struct super_block *sb,
+                                     struct buffer_head *bh))
 {
        int status = 0;
        int i, ignore_cache = 0;
        struct buffer_head *bh;
+       struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
 
-       mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n",
-                  inode, (unsigned long long)block, nr, flags);
+       mlog_entry("(ci=%p, block=(%llu), nr=(%d), flags=%d)\n",
+                  ci, (unsigned long long)block, nr, flags);
 
-       BUG_ON(!inode);
+       BUG_ON(!ci);
        BUG_ON((flags & OCFS2_BH_READAHEAD) &&
               (flags & OCFS2_BH_IGNORE_CACHE));
 
@@ -203,12 +212,12 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
                goto bail;
        }
 
-       mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
+       ocfs2_metadata_cache_io_lock(ci);
        for (i = 0 ; i < nr ; i++) {
                if (bhs[i] == NULL) {
-                       bhs[i] = sb_getblk(inode->i_sb, block++);
+                       bhs[i] = sb_getblk(sb, block++);
                        if (bhs[i] == NULL) {
-                               mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+                               ocfs2_metadata_cache_io_unlock(ci);
                                status = -EIO;
                                mlog_errno(status);
                                goto bail;
@@ -241,18 +250,16 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
                 *    before our is-it-in-flight check.
                 */
 
-               if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) {
+               if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) {
                        mlog(ML_UPTODATE,
-                            "bh (%llu), inode %llu not uptodate\n",
+                            "bh (%llu), owner %llu not uptodate\n",
                             (unsigned long long)bh->b_blocknr,
-                            (unsigned long long)OCFS2_I(inode)->ip_blkno);
+                            (unsigned long long)ocfs2_metadata_cache_owner(ci));
                        /* We're using ignore_cache here to say
                         * "go to disk" */
                        ignore_cache = 1;
                }
 
-               /* XXX: Can we ever get this and *not* have the cached
-                * flag set? */
                if (buffer_jbd(bh)) {
                        if (ignore_cache)
                                mlog(ML_BH_IO, "trying to sync read a jbd "
@@ -276,7 +283,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
                         * previously submitted request than we are
                         * done here. */
                        if ((flags & OCFS2_BH_READAHEAD)
-                           && ocfs2_buffer_read_ahead(inode, bh))
+                           && ocfs2_buffer_read_ahead(ci, bh))
                                continue;
 
                        lock_buffer(bh);
@@ -298,13 +305,15 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
                         * buffer lock. */
                        if (!(flags & OCFS2_BH_IGNORE_CACHE)
                            && !(flags & OCFS2_BH_READAHEAD)
-                           && ocfs2_buffer_uptodate(inode, bh)) {
+                           && ocfs2_buffer_uptodate(ci, bh)) {
                                unlock_buffer(bh);
                                continue;
                        }
 
                        clear_buffer_uptodate(bh);
                        get_bh(bh); /* for end_buffer_read_sync() */
+                       if (validate)
+                               set_buffer_needs_validate(bh);
                        bh->b_end_io = end_buffer_read_sync;
                        submit_bh(READ, bh);
                        continue;
@@ -318,7 +327,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 
                if (!(flags & OCFS2_BH_READAHEAD)) {
                        /* We know this can't have changed as we hold the
-                        * inode sem. Avoid doing any work on the bh if the
+                        * owner sem. Avoid doing any work on the bh if the
                         * journal has it. */
                        if (!buffer_jbd(bh))
                                wait_on_buffer(bh);
@@ -335,16 +344,30 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
                                bhs[i] = NULL;
                                continue;
                        }
+
+                       if (buffer_needs_validate(bh)) {
+                               /* We never set NeedsValidate if the
+                                * buffer was held by the journal, so
+                                * that better not have changed */
+                               BUG_ON(buffer_jbd(bh));
+                               clear_buffer_needs_validate(bh);
+                               status = validate(sb, bh);
+                               if (status) {
+                                       put_bh(bh);
+                                       bhs[i] = NULL;
+                                       continue;
+                               }
+                       }
                }
 
                /* Always set the buffer in the cache, even if it was
                 * a forced read, or read-ahead which hasn't yet
                 * completed. */
-               ocfs2_set_buffer_uptodate(inode, bh);
+               ocfs2_set_buffer_uptodate(ci, bh);
        }
-       mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+       ocfs2_metadata_cache_io_unlock(ci);
 
-       mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
+       mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
             (unsigned long long)block, nr,
             ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
             flags);
@@ -376,13 +399,14 @@ static void ocfs2_check_super_or_backup(struct super_block *sb,
 
 /*
  * Write super block and backups doesn't need to collaborate with journal,
- * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed
+ * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed
  * into this function.
  */
 int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
                                struct buffer_head *bh)
 {
        int ret = 0;
+       struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
 
        mlog_entry_void();
 
@@ -402,6 +426,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
 
        get_bh(bh); /* for end_buffer_write_sync() */
        bh->b_end_io = end_buffer_write_sync;
+       ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
        submit_bh(WRITE, bh);
 
        wait_on_buffer(bh);