Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6

[safe/jmp/linux-2.6] / fs / ocfs2 / buffer_head_io.c
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c

index 7e947c6..f9d5d3f 100644 (file)
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -25,7 +25,6 @@
  
  #include <linux/fs.h>
  #include <linux/types.h>
-#include <linux/slab.h>
  #include <linux/highmem.h>
  
  #include <cluster/masklog.h>
@@ -39,13 +38,25 @@
  
  #include "buffer_head_io.h"
  
+/*
+ * Bits on bh->b_state used by ocfs2.
+ *
+ * These MUST be after the JBD2 bits.  Hence, we use BH_JBDPrivateStart.
+ */
+enum ocfs2_state_bits {
+       BH_NeedsValidate = BH_JBDPrivateStart,
+};
+
+/* Expand the magic b_state functions */
+BUFFER_FNS(NeedsValidate, needs_validate);
+
  int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
-                     struct inode *inode)
+                     struct ocfs2_caching_info *ci)
  {
         int ret = 0;
  
-       mlog_entry("(bh->b_blocknr = %llu, inode=%p)\n",
-                  (unsigned long long)bh->b_blocknr, inode);
+       mlog_entry("(bh->b_blocknr = %llu, ci=%p)\n",
+                  (unsigned long long)bh->b_blocknr, ci);
  
         BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
         BUG_ON(buffer_jbd(bh));
@@ -58,7 +69,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
                 goto out;
         }
  
-       mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
+       ocfs2_metadata_cache_io_lock(ci);
  
         lock_buffer(bh);
         set_buffer_uptodate(bh);
@@ -73,7 +84,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
         wait_on_buffer(bh);
  
         if (buffer_uptodate(bh)) {
-               ocfs2_set_buffer_uptodate(inode, bh);
+               ocfs2_set_buffer_uptodate(ci, bh);
         } else {
                 /* We don't need to remove the clustered uptodate
                  * information for this bh as it's not marked locally
@@ -82,7 +93,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
                 put_bh(bh);
         }
  
-       mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+       ocfs2_metadata_cache_io_unlock(ci);
  out:
         mlog_exit(ret);
         return ret;
@@ -112,7 +123,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
                 bh = bhs[i];
  
                 if (buffer_jbd(bh)) {
-                       mlog(ML_ERROR,
+                       mlog(ML_BH_IO,
                              "trying to sync read a jbd "
                              "managed bh (blocknr = %llu), skipping\n",
                              (unsigned long long)bh->b_blocknr);
@@ -147,15 +158,10 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
         for (i = nr; i > 0; i--) {
                 bh = bhs[i - 1];
  
-               if (buffer_jbd(bh)) {
-                       mlog(ML_ERROR,
-                            "the journal got the buffer while it was "
-                            "locked for io! (blocknr = %llu)\n",
-                            (unsigned long long)bh->b_blocknr);
-                       BUG();
-               }
+               /* No need to wait on the buffer if it's managed by JBD. */
+               if (!buffer_jbd(bh))
+                       wait_on_buffer(bh);
  
-               wait_on_buffer(bh);
                 if (!buffer_uptodate(bh)) {
                         /* Status won't be cleared from here on out,
                          * so we can safely record this and loop back
@@ -170,17 +176,20 @@ bail:
         return status;
  }
  
-int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
-                     struct buffer_head *bhs[], int flags)
+int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
+                     struct buffer_head *bhs[], int flags,
+                     int (*validate)(struct super_block *sb,
+                                     struct buffer_head *bh))
  {
         int status = 0;
         int i, ignore_cache = 0;
         struct buffer_head *bh;
+       struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
  
-       mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n",
-                  inode, (unsigned long long)block, nr, flags);
+       mlog_entry("(ci=%p, block=(%llu), nr=(%d), flags=%d)\n",
+                  ci, (unsigned long long)block, nr, flags);
  
-       BUG_ON(!inode);
+       BUG_ON(!ci);
         BUG_ON((flags & OCFS2_BH_READAHEAD) &&
                (flags & OCFS2_BH_IGNORE_CACHE));
  
@@ -203,12 +212,12 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
                 goto bail;
         }
  
-       mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
+       ocfs2_metadata_cache_io_lock(ci);
         for (i = 0 ; i < nr ; i++) {
                 if (bhs[i] == NULL) {
-                       bhs[i] = sb_getblk(inode->i_sb, block++);
+                       bhs[i] = sb_getblk(sb, block++);
                         if (bhs[i] == NULL) {
-                               mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+                               ocfs2_metadata_cache_io_unlock(ci);
                                 status = -EIO;
                                 mlog_errno(status);
                                 goto bail;
@@ -241,18 +250,16 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
                  *    before our is-it-in-flight check.
                  */
  
-               if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) {
+               if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) {
                         mlog(ML_UPTODATE,
-                            "bh (%llu), inode %llu not uptodate\n",
+                            "bh (%llu), owner %llu not uptodate\n",
                              (unsigned long long)bh->b_blocknr,
-                            (unsigned long long)OCFS2_I(inode)->ip_blkno);
+                            (unsigned long long)ocfs2_metadata_cache_owner(ci));
                         /* We're using ignore_cache here to say
                          * "go to disk" */
                         ignore_cache = 1;
                 }
  
-               /* XXX: Can we ever get this and *not* have the cached
-                * flag set? */
                 if (buffer_jbd(bh)) {
                         if (ignore_cache)
                                 mlog(ML_BH_IO, "trying to sync read a jbd "
@@ -276,7 +283,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
                          * previously submitted request than we are
                          * done here. */
                         if ((flags & OCFS2_BH_READAHEAD)
-                           && ocfs2_buffer_read_ahead(inode, bh))
+                           && ocfs2_buffer_read_ahead(ci, bh))
                                 continue;
  
                         lock_buffer(bh);
@@ -298,13 +305,15 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
                          * buffer lock. */
                         if (!(flags & OCFS2_BH_IGNORE_CACHE)
                             && !(flags & OCFS2_BH_READAHEAD)
-                           && ocfs2_buffer_uptodate(inode, bh)) {
+                           && ocfs2_buffer_uptodate(ci, bh)) {
                                 unlock_buffer(bh);
                                 continue;
                         }
  
                         clear_buffer_uptodate(bh);
                         get_bh(bh); /* for end_buffer_read_sync() */
+                       if (validate)
+                               set_buffer_needs_validate(bh);
                         bh->b_end_io = end_buffer_read_sync;
                         submit_bh(READ, bh);
                         continue;
@@ -318,7 +327,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
  
                 if (!(flags & OCFS2_BH_READAHEAD)) {
                         /* We know this can't have changed as we hold the
-                        * inode sem. Avoid doing any work on the bh if the
+                        * owner sem. Avoid doing any work on the bh if the
                          * journal has it. */
                         if (!buffer_jbd(bh))
                                 wait_on_buffer(bh);
@@ -335,16 +344,30 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
                                 bhs[i] = NULL;
                                 continue;
                         }
+
+                       if (buffer_needs_validate(bh)) {
+                               /* We never set NeedsValidate if the
+                                * buffer was held by the journal, so
+                                * that better not have changed */
+                               BUG_ON(buffer_jbd(bh));
+                               clear_buffer_needs_validate(bh);
+                               status = validate(sb, bh);
+                               if (status) {
+                                       put_bh(bh);
+                                       bhs[i] = NULL;
+                                       continue;
+                               }
+                       }
                 }
  
                 /* Always set the buffer in the cache, even if it was
                  * a forced read, or read-ahead which hasn't yet
                  * completed. */
-               ocfs2_set_buffer_uptodate(inode, bh);
+               ocfs2_set_buffer_uptodate(ci, bh);
         }
-       mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+       ocfs2_metadata_cache_io_unlock(ci);
  
-       mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
+       mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
              (unsigned long long)block, nr,
              ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
              flags);
@@ -376,13 +399,14 @@ static void ocfs2_check_super_or_backup(struct super_block *sb,
  
  /*
   * Write super block and backups doesn't need to collaborate with journal,
- * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed
+ * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed
   * into this function.
   */
  int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
                                 struct buffer_head *bh)
  {
         int ret = 0;
+       struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
  
         mlog_entry_void();
  
@@ -402,6 +426,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
  
         get_bh(bh); /* for end_buffer_write_sync() */
         bh->b_end_io = end_buffer_write_sync;
+       ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
         submit_bh(WRITE, bh);
  
         wait_on_buffer(bh);