ext4: Make the extent validity check more paranoid

[safe/jmp/linux-2.6] / fs / ext4 / resize.c
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c

index 3e0f5d0..546c7dd 100644 (file)
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -11,11 +11,10 @@
  
  #define EXT4FS_DEBUG
  
-#include <linux/ext4_jbd2.h>
-
  #include <linux/errno.h>
  #include <linux/slab.h>
  
+#include "ext4_jbd2.h"
  #include "group.h"
  
  #define outside(b, first, last)        ((b) < (first) || (b) >= (last))
@@ -51,7 +50,7 @@ static int verify_group_input(struct super_block *sb,
         ext4_get_group_no_and_offset(sb, start, NULL, &offset);
         if (group != sbi->s_groups_count)
                 ext4_warning(sb, __func__,
-                            "Cannot add at group %u (only %lu groups)",
+                            "Cannot add at group %u (only %u groups)",
                              input->group, sbi->s_groups_count);
         else if (offset != 0)
                         ext4_warning(sb, __func__, "Last group not full");
@@ -74,7 +73,7 @@ static int verify_group_input(struct super_block *sb,
                              "Inode bitmap not in group (block %llu)",
                              (unsigned long long)input->inode_bitmap);
         else if (outside(input->inode_table, start, end) ||
-                outside(itend - 1, start, end))
+                outside(itend - 1, start, end))
                 ext4_warning(sb, __func__,
                              "Inode table not in group (blocks %llu-%llu)",
                              (unsigned long long)input->inode_table, itend - 1);
@@ -105,7 +104,7 @@ static int verify_group_input(struct super_block *sb,
                              (unsigned long long)input->inode_bitmap,
                              start, metaend - 1);
         else if (inside(input->inode_table, start, metaend) ||
-                inside(itend - 1, start, metaend))
+                inside(itend - 1, start, metaend))
                 ext4_warning(sb, __func__,
                              "Inode table (%llu-%llu) overlaps"
                              "GDT table (%llu-%llu)",
@@ -150,7 +149,7 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
  {
         int err;
  
-       if (handle->h_buffer_credits >= thresh)
+       if (ext4_handle_has_enough_credits(handle, thresh))
                 return 0;
  
         err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
@@ -159,9 +158,9 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
         if (err) {
                 if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
                         return err;
-               if ((err = ext4_journal_get_write_access(handle, bh)))
+               if ((err = ext4_journal_get_write_access(handle, bh)))
                         return err;
-        }
+       }
  
         return 0;
  }
@@ -233,7 +232,7 @@ static int setup_new_group_blocks(struct super_block *sb,
                 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
                 set_buffer_uptodate(gdb);
                 unlock_buffer(gdb);
-               ext4_journal_dirty_metadata(handle, gdb);
+               ext4_handle_dirty_metadata(handle, NULL, gdb);
                 ext4_set_bit(bit, bh->b_data);
                 brelse(gdb);
         }
@@ -252,7 +251,7 @@ static int setup_new_group_blocks(struct super_block *sb,
                         err = PTR_ERR(bh);
                         goto exit_bh;
                 }
-               ext4_journal_dirty_metadata(handle, gdb);
+               ext4_handle_dirty_metadata(handle, NULL, gdb);
                 ext4_set_bit(bit, bh->b_data);
                 brelse(gdb);
         }
@@ -277,7 +276,7 @@ static int setup_new_group_blocks(struct super_block *sb,
                         err = PTR_ERR(it);
                         goto exit_bh;
                 }
-               ext4_journal_dirty_metadata(handle, it);
+               ext4_handle_dirty_metadata(handle, NULL, it);
                 brelse(it);
                 ext4_set_bit(bit, bh->b_data);
         }
@@ -285,11 +284,9 @@ static int setup_new_group_blocks(struct super_block *sb,
         if ((err = extend_or_restart_transaction(handle, 2, bh)))
                 goto exit_bh;
  
-       mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb),
-                       bh->b_data);
-       ext4_journal_dirty_metadata(handle, bh);
+       mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data);
+       ext4_handle_dirty_metadata(handle, NULL, bh);
         brelse(bh);
-
         /* Mark unused entries in inode bitmap used */
         ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
                    input->inode_bitmap, input->inode_bitmap - start);
@@ -298,9 +295,9 @@ static int setup_new_group_blocks(struct super_block *sb,
                 goto exit_journal;
         }
  
-       mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
+       mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
                         bh->b_data);
-       ext4_journal_dirty_metadata(handle, bh);
+       ext4_handle_dirty_metadata(handle, NULL, bh);
  exit_bh:
         brelse(bh);
  
@@ -419,9 +416,9 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
  
         /*
          * If we are not using the primary superblock/GDT copy don't resize,
-        * because the user tools have no way of handling this.  Probably a
-        * bad time to do it anyways.
-        */
+         * because the user tools have no way of handling this.  Probably a
+         * bad time to do it anyways.
+         */
         if (EXT4_SB(sb)->s_sbh->b_blocknr !=
             le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
                 ext4_warning(sb, __func__,
@@ -469,7 +466,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
                 goto exit_dindj;
  
         n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
-                       GFP_KERNEL);
+                       GFP_NOFS);
         if (!n_group_desc) {
                 err = -ENOMEM;
                 ext4_warning(sb, __func__,
@@ -487,12 +484,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
          * reserved inode, and will become GDT blocks (primary and backup).
          */
         data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
-       ext4_journal_dirty_metadata(handle, dind);
+       ext4_handle_dirty_metadata(handle, NULL, dind);
         brelse(dind);
         inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
         ext4_mark_iloc_dirty(handle, inode, &iloc);
         memset((*primary)->b_data, 0, sb->s_blocksize);
-       ext4_journal_dirty_metadata(handle, *primary);
+       ext4_handle_dirty_metadata(handle, NULL, *primary);
  
         o_group_desc = EXT4_SB(sb)->s_group_desc;
         memcpy(n_group_desc, o_group_desc,
@@ -503,19 +500,19 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
         kfree(o_group_desc);
  
         le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
-       ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+       ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
  
         return 0;
  
  exit_inode:
-       //ext4_journal_release_buffer(handle, iloc.bh);
+       /* ext4_journal_release_buffer(handle, iloc.bh); */
         brelse(iloc.bh);
  exit_dindj:
-       //ext4_journal_release_buffer(handle, dind);
+       /* ext4_journal_release_buffer(handle, dind); */
  exit_primary:
-       //ext4_journal_release_buffer(handle, *primary);
+       /* ext4_journal_release_buffer(handle, *primary); */
  exit_sbh:
-       //ext4_journal_release_buffer(handle, *primary);
+       /* ext4_journal_release_buffer(handle, *primary); */
  exit_dind:
         brelse(dind);
  exit_bh:
@@ -552,7 +549,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
         int res, i;
         int err;
  
-       primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL);
+       primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
         if (!primary)
                 return -ENOMEM;
  
@@ -564,7 +561,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
         }
  
         blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
-       data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count;
+       data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count %
+                                        EXT4_ADDR_PER_BLOCK(sb));
         end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
  
         /* Get each reserved primary GDT block and verify it holds backups */
@@ -618,7 +616,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
                        primary[i]->b_blocknr, gdbackups,
                        blk + primary[i]->b_blocknr); */
                 data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
-               err2 = ext4_journal_dirty_metadata(handle, primary[i]);
+               err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]);
                 if (!err)
                         err = err2;
         }
@@ -676,7 +674,8 @@ static void update_backups(struct super_block *sb,
                 struct buffer_head *bh;
  
                 /* Out of journal space, and can't get more - abort - so sad */
-               if (handle->h_buffer_credits == 0 &&
+               if (ext4_handle_valid(handle) &&
+                   handle->h_buffer_credits == 0 &&
                     ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
                     (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
                         break;
@@ -696,7 +695,7 @@ static void update_backups(struct super_block *sb,
                         memset(bh->b_data + size, 0, rest);
                 set_buffer_uptodate(bh);
                 unlock_buffer(bh);
-               ext4_journal_dirty_metadata(handle, bh);
+               ext4_handle_dirty_metadata(handle, NULL, bh);
                 brelse(bh);
         }
         if ((err2 = ext4_journal_stop(handle)) && !err)
@@ -715,7 +714,7 @@ static void update_backups(struct super_block *sb,
  exit_err:
         if (err) {
                 ext4_warning(sb, __func__,
-                            "can't update backup for group %lu (err %d), "
+                            "can't update backup for group %u (err %d), "
                              "forcing fsck on next reboot", group, err);
                 sbi->s_mount_state &= ~EXT4_VALID_FS;
                 sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
@@ -747,6 +746,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
         struct inode *inode = NULL;
         handle_t *handle;
         int gdb_off, gdb_num;
+       int num_grp_locked = 0;
         int err, err2;
  
         gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
@@ -761,19 +761,20 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
  
         if (ext4_blocks_count(es) + input->blocks_count <
             ext4_blocks_count(es)) {
-               ext4_warning(sb, __func__, "blocks_count overflow\n");
+               ext4_warning(sb, __func__, "blocks_count overflow");
                 return -EINVAL;
         }
  
         if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
             le32_to_cpu(es->s_inodes_count)) {
-               ext4_warning(sb, __func__, "inodes_count overflow\n");
+               ext4_warning(sb, __func__, "inodes_count overflow");
                 return -EINVAL;
         }
  
         if (reserved_gdb || gdb_off == 0) {
                 if (!EXT4_HAS_COMPAT_FEATURE(sb,
-                                            EXT4_FEATURE_COMPAT_RESIZE_INODE)){
+                                            EXT4_FEATURE_COMPAT_RESIZE_INODE)
+                   || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
                         ext4_warning(sb, __func__,
                                      "No reserved GDT blocks, can't resize");
                         return -EPERM;
@@ -786,6 +787,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
                 }
         }
  
+
         if ((err = verify_group_input(sb, input)))
                 goto exit_put;
  
@@ -818,12 +820,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
         if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
                 goto exit_journal;
  
-       /*
-        * We will only either add reserved group blocks to a backup group
-        * or remove reserved blocks for the first group in a new group block.
-        * Doing both would be mean more complex code, and sane people don't
-        * use non-sparse filesystems anymore.  This is already checked above.
-        */
+        /*
+         * We will only either add reserved group blocks to a backup group
+         * or remove reserved blocks for the first group in a new group block.
+         * Doing both would be mean more complex code, and sane people don't
+         * use non-sparse filesystems anymore.  This is already checked above.
+         */
         if (gdb_off) {
                 primary = sbi->s_group_desc[gdb_num];
                 if ((err = ext4_journal_get_write_access(handle, primary)))
@@ -835,36 +837,50 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
         } else if ((err = add_new_gdb(handle, inode, input, &primary)))
                 goto exit_journal;
  
-       /*
-        * OK, now we've set up the new group.  Time to make it active.
-        *
-        * Current kernels don't lock all allocations via lock_super(),
-        * so we have to be safe wrt. concurrent accesses the group
-        * data.  So we need to be careful to set all of the relevant
-        * group descriptor data etc. *before* we enable the group.
-        *
-        * The key field here is sbi->s_groups_count: as long as
-        * that retains its old value, nobody is going to access the new
-        * group.
-        *
-        * So first we update all the descriptor metadata for the new
-        * group; then we update the total disk blocks count; then we
-        * update the groups count to enable the group; then finally we
-        * update the free space counts so that the system can start
-        * using the new disk blocks.
-        */
-
+        /*
+         * OK, now we've set up the new group.  Time to make it active.
+         *
+         * Current kernels don't lock all allocations via lock_super(),
+         * so we have to be safe wrt. concurrent accesses the group
+         * data.  So we need to be careful to set all of the relevant
+         * group descriptor data etc. *before* we enable the group.
+         *
+         * The key field here is sbi->s_groups_count: as long as
+         * that retains its old value, nobody is going to access the new
+         * group.
+         *
+         * So first we update all the descriptor metadata for the new
+         * group; then we update the total disk blocks count; then we
+         * update the groups count to enable the group; then finally we
+         * update the free space counts so that the system can start
+         * using the new disk blocks.
+         */
+
+       num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group);
         /* Update group descriptor block for new group */
-       gdp = (struct ext4_group_desc *)primary->b_data + gdb_off;
+       gdp = (struct ext4_group_desc *)((char *)primary->b_data +
+                                        gdb_off * EXT4_DESC_SIZE(sb));
  
+       memset(gdp, 0, EXT4_DESC_SIZE(sb));
         ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
         ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
         ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
-       gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
-       gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
+       ext4_free_blks_set(sb, gdp, input->free_blocks_count);
+       ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
+       gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
         gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
  
         /*
+        * We can allocate memory for mb_alloc based on the new group
+        * descriptor
+        */
+       err = ext4_mb_add_groupinfo(sb, input->group, gdp);
+       if (err) {
+               ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
+               goto exit_journal;
+       }
+
+       /*
          * Make the new blocks and inodes valid next.  We do this before
          * increasing the group count so that once the group is enabled,
          * all of its blocks and inodes are already valid.
@@ -904,8 +920,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
  
         /* Update the global fs size fields */
         sbi->s_groups_count++;
+       ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
  
-       ext4_journal_dirty_metadata(handle, primary);
+       ext4_handle_dirty_metadata(handle, NULL, primary);
  
         /* Update the reserved block counts only once the new group is
          * active. */
@@ -918,7 +935,16 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
         percpu_counter_add(&sbi->s_freeinodes_counter,
                            EXT4_INODES_PER_GROUP(sb));
  
-       ext4_journal_dirty_metadata(handle, sbi->s_sbh);
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+               ext4_group_t flex_group;
+               flex_group = ext4_flex_group(sbi, input->group);
+               atomic_add(input->free_blocks_count,
+                          &sbi->s_flex_groups[flex_group].free_blocks);
+               atomic_add(EXT4_INODES_PER_GROUP(sb),
+                          &sbi->s_flex_groups[flex_group].free_inodes);
+       }
+
+       ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
         sb->s_dirt = 1;
  
  exit_journal:
@@ -936,7 +962,8 @@ exit_put:
         return err;
  } /* ext4_group_add */
  
-/* Extend the filesystem to the new number of blocks specified.  This entry
+/*
+ * Extend the filesystem to the new number of blocks specified.  This entry
   * point is only used to extend the current filesystem to the end of the last
   * existing group.  It can be accessed via ioctl, or by "remount,resize=<size>"
   * for emergencies (because it has no dependencies on reserved blocks).
@@ -952,10 +979,10 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
         ext4_group_t o_groups_count;
         ext4_grpblk_t last;
         ext4_grpblk_t add;
-       struct buffer_head * bh;
+       struct buffer_head *bh;
         handle_t *handle;
         int err;
-       unsigned long freed_blocks;
+       ext4_group_t group;
  
         /* We don't need to worry about locking wrt other resizers just
          * yet: we're going to revalidate es->s_blocks_count after
@@ -975,8 +1002,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                         " too large to resize to %llu blocks safely\n",
                         sb->s_id, n_blocks_count);
                 if (sizeof(sector_t) < 8)
-                       ext4_warning(sb, __func__,
-                       "CONFIG_LBD not enabled\n");
+                       ext4_warning(sb, __func__, "CONFIG_LBD not enabled");
                 return -EINVAL;
         }
  
@@ -987,7 +1013,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
         }
  
         /* Handle the remaining blocks in the last group only. */
-       ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
+       ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
  
         if (last == 0) {
                 ext4_warning(sb, __func__,
@@ -1012,7 +1038,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                              o_blocks_count + add, add);
  
         /* See if the device is actually as big as what was requested */
-       bh = sb_bread(sb, o_blocks_count + add -1);
+       bh = sb_bread(sb, o_blocks_count + add - 1);
         if (!bh) {
                 ext4_warning(sb, __func__,
                              "can't read last block, resize aborted");
@@ -1049,16 +1075,18 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                 goto exit_put;
         }
         ext4_blocks_count_set(es, o_blocks_count + add);
-       ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+       ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
         sb->s_dirt = 1;
         unlock_super(sb);
         ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
                    o_blocks_count + add);
-       ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
+       /* We add the blocks to the bitmap and set the group need init bit */
+       ext4_add_groupblocks(handle, sb, o_blocks_count, add);
         ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
                    o_blocks_count + add);
         if ((err = ext4_journal_stop(handle)))
                 goto exit_put;
+
         if (test_opt(sb, DEBUG))
                 printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
                        ext4_blocks_count(es));