ocfs2: Validate metadata only when it's read from disk.
authorJoel Becker <joel.becker@oracle.com>
Thu, 13 Nov 2008 22:49:19 +0000 (14:49 -0800)
committerMark Fasheh <mfasheh@suse.com>
Mon, 5 Jan 2009 16:36:53 +0000 (08:36 -0800)
Add an optional validation hook to ocfs2_read_blocks().  Now the
validation function is only called when a block was actually read off of
disk.  It is not called when the buffer was in cache.

We add a buffer state bit BH_NeedsValidate to flag these buffers.  It
must always be one higher than the last JBD2 buffer state bit.

The dinode, dirblock, extent_block, and xattr_block validators are
lifted to this scheme directly.  The group_descriptor validator needs to
be split into two pieces.  The first part only needs the gd buffer and
is passed to ocfs2_read_block().  The second part requires the dinode as
well, and is called every time.  It's only 3 compares, so it's tiny.
This also allows us to clean up the non-fatal gd check used by resize.c.
It now has no magic argument.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
fs/ocfs2/alloc.c
fs/ocfs2/buffer_head_io.c
fs/ocfs2/buffer_head_io.h
fs/ocfs2/dir.c
fs/ocfs2/inode.c
fs/ocfs2/resize.c
fs/ocfs2/slot_map.c
fs/ocfs2/suballoc.c
fs/ocfs2/suballoc.h
fs/ocfs2/xattr.c

index f430cc6..e823a27 100644 (file)
@@ -684,6 +684,9 @@ static int ocfs2_validate_extent_block(struct super_block *sb,
        struct ocfs2_extent_block *eb =
                (struct ocfs2_extent_block *)bh->b_data;
 
+       mlog(0, "Validating extent block %llu\n",
+            (unsigned long long)bh->b_blocknr);
+
        if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
                ocfs2_error(sb,
                            "Extent block #%llu has bad signature %.*s",
@@ -719,21 +722,13 @@ int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
        int rc;
        struct buffer_head *tmp = *bh;
 
-       rc = ocfs2_read_block(inode, eb_blkno, &tmp);
-       if (rc)
-               goto out;
-
-       rc = ocfs2_validate_extent_block(inode->i_sb, tmp);
-       if (rc) {
-               brelse(tmp);
-               goto out;
-       }
+       rc = ocfs2_read_block(inode, eb_blkno, &tmp,
+                             ocfs2_validate_extent_block);
 
        /* If ocfs2_read_block() got us a new bh, pass it up. */
-       if (!*bh)
+       if (!rc && !*bh)
                *bh = tmp;
 
-out:
        return rc;
 }
 
index 3a178ec..0e9eed0 100644 (file)
 
 #include "buffer_head_io.h"
 
+/*
+ * Bits on bh->b_state used by ocfs2.
+ *
+ * These MUST be after the JBD2 bits.  Currently BH_Unshadow is the last
+ * JBD2 bit.
+ */
+enum ocfs2_state_bits {
+       BH_NeedsValidate = BH_Unshadow + 1,
+};
+
+/* Expand the magic b_state functions */
+BUFFER_FNS(NeedsValidate, needs_validate);
+
 int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
                      struct inode *inode)
 {
@@ -166,7 +179,9 @@ bail:
 }
 
 int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
-                     struct buffer_head *bhs[], int flags)
+                     struct buffer_head *bhs[], int flags,
+                     int (*validate)(struct super_block *sb,
+                                     struct buffer_head *bh))
 {
        int status = 0;
        int i, ignore_cache = 0;
@@ -298,6 +313,8 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 
                        clear_buffer_uptodate(bh);
                        get_bh(bh); /* for end_buffer_read_sync() */
+                       if (validate)
+                               set_buffer_needs_validate(bh);
                        bh->b_end_io = end_buffer_read_sync;
                        submit_bh(READ, bh);
                        continue;
@@ -328,6 +345,20 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
                                bhs[i] = NULL;
                                continue;
                        }
+
+                       if (buffer_needs_validate(bh)) {
+                               /* We never set NeedsValidate if the
+                                * buffer was held by the journal, so
+                                * that better not have changed */
+                               BUG_ON(buffer_jbd(bh));
+                               clear_buffer_needs_validate(bh);
+                               status = validate(inode->i_sb, bh);
+                               if (status) {
+                                       put_bh(bh);
+                                       bhs[i] = NULL;
+                                       continue;
+                               }
+                       }
                }
 
                /* Always set the buffer in the cache, even if it was
index 75e1dcb..c75d682 100644 (file)
 void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
                             int uptodate);
 
-static inline int ocfs2_read_block(struct inode               *inode,
-                                  u64                  off,
-                                  struct buffer_head **bh);
-
 int ocfs2_write_block(struct ocfs2_super          *osb,
                      struct buffer_head  *bh,
                      struct inode        *inode);
-int ocfs2_read_blocks(struct inode       *inode,
-                     u64                  block,
-                     int                  nr,
-                     struct buffer_head  *bhs[],
-                     int                  flags);
 int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
                           unsigned int nr, struct buffer_head *bhs[]);
 
+/*
+ * If not NULL, validate() will be called on a buffer that is freshly
+ * read from disk.  It will not be called if the buffer was in cache.
+ * Note that if validate() is being used for this buffer, it needs to
+ * be set even for a READAHEAD call, as it marks the buffer for later
+ * validation.
+ */
+int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
+                     struct buffer_head *bhs[], int flags,
+                     int (*validate)(struct super_block *sb,
+                                     struct buffer_head *bh));
+
 int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
                                struct buffer_head *bh);
 
@@ -53,7 +56,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
 #define OCFS2_BH_READAHEAD         8
 
 static inline int ocfs2_read_block(struct inode *inode, u64 off,
-                                  struct buffer_head **bh)
+                                  struct buffer_head **bh,
+                                  int (*validate)(struct super_block *sb,
+                                                  struct buffer_head *bh))
 {
        int status = 0;
 
@@ -63,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off,
                goto bail;
        }
 
-       status = ocfs2_read_blocks(inode, off, 1, bh, 0);
+       status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate);
 
 bail:
        return status;
index c2f3fd9..7e863d4 100644 (file)
@@ -214,6 +214,8 @@ static int ocfs2_validate_dir_block(struct super_block *sb,
         * Nothing yet.  We don't validate dirents here, that's handled
         * in-place when the code walks them.
         */
+       mlog(0, "Validating dirblock %llu\n",
+            (unsigned long long)bh->b_blocknr);
 
        return 0;
 }
@@ -255,20 +257,13 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
                goto out;
        }
 
-       rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags);
+       rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags,
+                              ocfs2_validate_dir_block);
        if (rc) {
                mlog_errno(rc);
                goto out;
        }
 
-       if (!(flags & OCFS2_BH_READAHEAD)) {
-               rc = ocfs2_validate_dir_block(inode->i_sb, tmp);
-               if (rc) {
-                       brelse(tmp);
-                       goto out;
-               }
-       }
-
        /* If ocfs2_read_blocks() got us a new bh, pass it up.  */
        if (!*bh)
                *bh = tmp;
index 9eb701b..ec3497b 100644 (file)
@@ -1255,6 +1255,9 @@ int ocfs2_validate_inode_block(struct super_block *sb,
        int rc = -EINVAL;
        struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
 
+       mlog(0, "Validating dinode %llu\n",
+            (unsigned long long)bh->b_blocknr);
+
        BUG_ON(!buffer_uptodate(bh));
 
        if (!OCFS2_IS_VALID_DINODE(di)) {
@@ -1300,23 +1303,12 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
        struct buffer_head *tmp = *bh;
 
        rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp,
-                              flags);
-       if (rc)
-               goto out;
-
-       if (!(flags & OCFS2_BH_READAHEAD)) {
-               rc = ocfs2_validate_inode_block(inode->i_sb, tmp);
-               if (rc) {
-                       brelse(tmp);
-                       goto out;
-               }
-       }
+                              flags, ocfs2_validate_inode_block);
 
        /* If ocfs2_read_blocks() got us a new bh, pass it up. */
-       if (!*bh)
+       if (!rc && !*bh)
                *bh = tmp;
 
-out:
        return rc;
 }
 
index 252baff..867de3e 100644 (file)
@@ -394,7 +394,7 @@ static int ocfs2_check_new_group(struct inode *inode,
                (struct ocfs2_group_desc *)group_bh->b_data;
        u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
 
-       ret = ocfs2_validate_group_descriptor(inode->i_sb, di, group_bh, 1);
+       ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh);
        if (ret)
                goto out;
 
index bdda2d8..40661e7 100644 (file)
@@ -151,7 +151,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
         * this is not true, the read of -1 (UINT64_MAX) will fail.
         */
        ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
-                               OCFS2_BH_IGNORE_CACHE);
+                               OCFS2_BH_IGNORE_CACHE, NULL);
        if (ret == 0) {
                spin_lock(&osb->osb_lock);
                ocfs2_update_slot_info(si);
@@ -405,7 +405,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
 
                bh = NULL;  /* Acquire a fresh bh */
                status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
-                                          OCFS2_BH_IGNORE_CACHE);
+                                          OCFS2_BH_IGNORE_CACHE, NULL);
                if (status < 0) {
                        mlog_errno(status);
                        goto bail;
index 766a00b..226fe21 100644 (file)
@@ -145,14 +145,6 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
        return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
 }
 
-int ocfs2_validate_group_descriptor(struct super_block *sb,
-                                   struct ocfs2_dinode *di,
-                                   struct buffer_head *bh,
-                                   int clean_error)
-{
-       unsigned int max_bits;
-       struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
-
 #define do_error(fmt, ...)                                             \
        do{                                                             \
                if (clean_error)                                        \
@@ -161,6 +153,12 @@ int ocfs2_validate_group_descriptor(struct super_block *sb,
                        ocfs2_error(sb, fmt, ##__VA_ARGS__);            \
        } while (0)
 
+static int ocfs2_validate_gd_self(struct super_block *sb,
+                                 struct buffer_head *bh,
+                                 int clean_error)
+{
+       struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
+
        if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
                do_error("Group descriptor #%llu has bad signature %.*s",
                         (unsigned long long)bh->b_blocknr, 7,
@@ -184,6 +182,35 @@ int ocfs2_validate_group_descriptor(struct super_block *sb,
                return -EINVAL;
        }
 
+       if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
+               do_error("Group descriptor #%llu has bit count %u but "
+                        "claims that %u are free",
+                        (unsigned long long)bh->b_blocknr,
+                        le16_to_cpu(gd->bg_bits),
+                        le16_to_cpu(gd->bg_free_bits_count));
+               return -EINVAL;
+       }
+
+       if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
+               do_error("Group descriptor #%llu has bit count %u but "
+                        "max bitmap bits of %u",
+                        (unsigned long long)bh->b_blocknr,
+                        le16_to_cpu(gd->bg_bits),
+                        8 * le16_to_cpu(gd->bg_size));
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int ocfs2_validate_gd_parent(struct super_block *sb,
+                                   struct ocfs2_dinode *di,
+                                   struct buffer_head *bh,
+                                   int clean_error)
+{
+       unsigned int max_bits;
+       struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
+
        if (di->i_blkno != gd->bg_parent_dinode) {
                do_error("Group descriptor #%llu has bad parent "
                         "pointer (%llu, expected %llu)",
@@ -209,26 +236,35 @@ int ocfs2_validate_group_descriptor(struct super_block *sb,
                return -EINVAL;
        }
 
-       if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
-               do_error("Group descriptor #%llu has bit count %u but "
-                        "claims that %u are free",
-                        (unsigned long long)bh->b_blocknr,
-                        le16_to_cpu(gd->bg_bits),
-                        le16_to_cpu(gd->bg_free_bits_count));
-               return -EINVAL;
-       }
+       return 0;
+}
 
-       if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
-               do_error("Group descriptor #%llu has bit count %u but "
-                        "max bitmap bits of %u",
-                        (unsigned long long)bh->b_blocknr,
-                        le16_to_cpu(gd->bg_bits),
-                        8 * le16_to_cpu(gd->bg_size));
-               return -EINVAL;
-       }
 #undef do_error
 
-       return 0;
+/*
+ * This version only prints errors.  It does not fail the filesystem, and
+ * exists only for resize.
+ */
+int ocfs2_check_group_descriptor(struct super_block *sb,
+                                struct ocfs2_dinode *di,
+                                struct buffer_head *bh)
+{
+       int rc;
+
+       rc = ocfs2_validate_gd_self(sb, bh, 1);
+       if (!rc)
+               rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
+
+       return rc;
+}
+
+static int ocfs2_validate_group_descriptor(struct super_block *sb,
+                                          struct buffer_head *bh)
+{
+       mlog(0, "Validating group descriptor %llu\n",
+            (unsigned long long)bh->b_blocknr);
+
+       return ocfs2_validate_gd_self(sb, bh, 0);
 }
 
 int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
@@ -237,11 +273,12 @@ int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
        int rc;
        struct buffer_head *tmp = *bh;
 
-       rc = ocfs2_read_block(inode, gd_blkno, &tmp);
+       rc = ocfs2_read_block(inode, gd_blkno, &tmp,
+                             ocfs2_validate_group_descriptor);
        if (rc)
                goto out;
 
-       rc = ocfs2_validate_group_descriptor(inode->i_sb, di, tmp, 0);
+       rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
        if (rc) {
                brelse(tmp);
                goto out;
index 43de4fd..e3c13c7 100644 (file)
@@ -165,16 +165,15 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
 u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
 
 /*
- * By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it
+ * By default, ocfs2_read_group_descriptor() calls ocfs2_error() when it
  * finds a problem.  A caller that wants to check a group descriptor
- * without going readonly passes a nonzero clean_error.  This is only
- * resize, really.  Everyone else should be using
- * ocfs2_read_group_descriptor().
+ * without going readonly should read the block with ocfs2_read_block[s]()
+ * and then checking it with this function.  This is only resize, really.
+ * Everyone else should be using ocfs2_read_group_descriptor().
  */
-int ocfs2_validate_group_descriptor(struct super_block *sb,
-                                   struct ocfs2_dinode *di,
-                                   struct buffer_head *bh,
-                                   int clean_error);
+int ocfs2_check_group_descriptor(struct super_block *sb,
+                                struct ocfs2_dinode *di,
+                                struct buffer_head *bh);
 /*
  * Read a group descriptor block into *bh.  If *bh is NULL, a bh will be
  * allocated.  This is a cached read.  The descriptor will be validated with
index ef4aa54..8af29b3 100644 (file)
@@ -266,7 +266,8 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
        int rc;
 
        rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
-                              bucket->bu_blocks, bucket->bu_bhs, 0);
+                              bucket->bu_blocks, bucket->bu_bhs, 0,
+                              NULL);
        if (rc)
                ocfs2_xattr_bucket_relse(bucket);
        return rc;
@@ -359,12 +360,8 @@ static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
        int rc;
        struct buffer_head *tmp = *bh;
 
-       rc = ocfs2_read_block(inode, xb_blkno, &tmp);
-       if (!rc) {
-               rc = ocfs2_validate_xattr_block(inode->i_sb, tmp);
-               if (rc)
-                       brelse(tmp);
-       }
+       rc = ocfs2_read_block(inode, xb_blkno, &tmp,
+                             ocfs2_validate_xattr_block);
 
        /* If ocfs2_read_block() got us a new bh, pass it up. */
        if (!rc && !*bh)
@@ -925,7 +922,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
                blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
                /* Copy ocfs2_xattr_value */
                for (i = 0; i < num_clusters * bpc; i++, blkno++) {
-                       ret = ocfs2_read_block(inode, blkno, &bh);
+                       ret = ocfs2_read_block(inode, blkno, &bh, NULL);
                        if (ret) {
                                mlog_errno(ret);
                                goto out;
@@ -1174,7 +1171,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
                blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 
                for (i = 0; i < num_clusters * bpc; i++, blkno++) {
-                       ret = ocfs2_read_block(inode, blkno, &bh);
+                       ret = ocfs2_read_block(inode, blkno, &bh, NULL);
                        if (ret) {
                                mlog_errno(ret);
                                goto out;
@@ -2206,7 +2203,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
                base = xis->base;
                credits += OCFS2_INODE_UPDATE_CREDITS;
        } else {
-               int i, block_off;
+               int i, block_off = 0;
                xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
                xe = xbs->here;
                name_offset = le16_to_cpu(xe->xe_name_offset);
@@ -2840,6 +2837,7 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
                        break;
                }
 
+
                xe_name = bucket_block(bucket, block_off) + new_offset;
                if (!memcmp(name, xe_name, name_len)) {
                        *xe_index = i;
@@ -3598,7 +3596,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
                        goto out;
                }
 
-               ret = ocfs2_read_block(inode, prev_blkno, &old_bh);
+               ret = ocfs2_read_block(inode, prev_blkno, &old_bh, NULL);
                if (ret < 0) {
                        mlog_errno(ret);
                        brelse(new_bh);
@@ -3990,7 +3988,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
        ocfs2_journal_dirty(handle, first_bh);
 
        /* update the new bucket header. */
-       ret = ocfs2_read_block(inode, to_blk_start, &bh);
+       ret = ocfs2_read_block(inode, to_blk_start, &bh, NULL);
        if (ret < 0) {
                mlog_errno(ret);
                goto out;
@@ -4337,7 +4335,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
                goto out;
        }
 
-       ret = ocfs2_read_block(inode, p_blkno, &first_bh);
+       ret = ocfs2_read_block(inode, p_blkno, &first_bh, NULL);
        if (ret) {
                mlog_errno(ret);
                goto out;
@@ -4635,7 +4633,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
        BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
        value_blk += header_bh->b_blocknr;
 
-       ret = ocfs2_read_block(inode, value_blk, &value_bh);
+       ret = ocfs2_read_block(inode, value_blk, &value_bh, NULL);
        if (ret) {
                mlog_errno(ret);
                goto out;