ext4: Make the extent validity check more paranoid
[safe/jmp/linux-2.6] / fs / ext4 / resize.c
index 3e0f5d0..546c7dd 100644 (file)
 
 #define EXT4FS_DEBUG
 
-#include <linux/ext4_jbd2.h>
-
 #include <linux/errno.h>
 #include <linux/slab.h>
 
+#include "ext4_jbd2.h"
 #include "group.h"
 
 #define outside(b, first, last)        ((b) < (first) || (b) >= (last))
@@ -51,7 +50,7 @@ static int verify_group_input(struct super_block *sb,
        ext4_get_group_no_and_offset(sb, start, NULL, &offset);
        if (group != sbi->s_groups_count)
                ext4_warning(sb, __func__,
-                            "Cannot add at group %u (only %lu groups)",
+                            "Cannot add at group %u (only %u groups)",
                             input->group, sbi->s_groups_count);
        else if (offset != 0)
                        ext4_warning(sb, __func__, "Last group not full");
@@ -74,7 +73,7 @@ static int verify_group_input(struct super_block *sb,
                             "Inode bitmap not in group (block %llu)",
                             (unsigned long long)input->inode_bitmap);
        else if (outside(input->inode_table, start, end) ||
-                outside(itend - 1, start, end))
+                outside(itend - 1, start, end))
                ext4_warning(sb, __func__,
                             "Inode table not in group (blocks %llu-%llu)",
                             (unsigned long long)input->inode_table, itend - 1);
@@ -105,7 +104,7 @@ static int verify_group_input(struct super_block *sb,
                             (unsigned long long)input->inode_bitmap,
                             start, metaend - 1);
        else if (inside(input->inode_table, start, metaend) ||
-                inside(itend - 1, start, metaend))
+                inside(itend - 1, start, metaend))
                ext4_warning(sb, __func__,
                             "Inode table (%llu-%llu) overlaps"
                             "GDT table (%llu-%llu)",
@@ -150,7 +149,7 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
 {
        int err;
 
-       if (handle->h_buffer_credits >= thresh)
+       if (ext4_handle_has_enough_credits(handle, thresh))
                return 0;
 
        err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
@@ -159,9 +158,9 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
        if (err) {
                if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
                        return err;
-               if ((err = ext4_journal_get_write_access(handle, bh)))
+               if ((err = ext4_journal_get_write_access(handle, bh)))
                        return err;
-        }
+       }
 
        return 0;
 }
@@ -233,7 +232,7 @@ static int setup_new_group_blocks(struct super_block *sb,
                memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
                set_buffer_uptodate(gdb);
                unlock_buffer(gdb);
-               ext4_journal_dirty_metadata(handle, gdb);
+               ext4_handle_dirty_metadata(handle, NULL, gdb);
                ext4_set_bit(bit, bh->b_data);
                brelse(gdb);
        }
@@ -252,7 +251,7 @@ static int setup_new_group_blocks(struct super_block *sb,
                        err = PTR_ERR(bh);
                        goto exit_bh;
                }
-               ext4_journal_dirty_metadata(handle, gdb);
+               ext4_handle_dirty_metadata(handle, NULL, gdb);
                ext4_set_bit(bit, bh->b_data);
                brelse(gdb);
        }
@@ -277,7 +276,7 @@ static int setup_new_group_blocks(struct super_block *sb,
                        err = PTR_ERR(it);
                        goto exit_bh;
                }
-               ext4_journal_dirty_metadata(handle, it);
+               ext4_handle_dirty_metadata(handle, NULL, it);
                brelse(it);
                ext4_set_bit(bit, bh->b_data);
        }
@@ -285,11 +284,9 @@ static int setup_new_group_blocks(struct super_block *sb,
        if ((err = extend_or_restart_transaction(handle, 2, bh)))
                goto exit_bh;
 
-       mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb),
-                       bh->b_data);
-       ext4_journal_dirty_metadata(handle, bh);
+       mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data);
+       ext4_handle_dirty_metadata(handle, NULL, bh);
        brelse(bh);
-
        /* Mark unused entries in inode bitmap used */
        ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
                   input->inode_bitmap, input->inode_bitmap - start);
@@ -298,9 +295,9 @@ static int setup_new_group_blocks(struct super_block *sb,
                goto exit_journal;
        }
 
-       mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
+       mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
                        bh->b_data);
-       ext4_journal_dirty_metadata(handle, bh);
+       ext4_handle_dirty_metadata(handle, NULL, bh);
 exit_bh:
        brelse(bh);
 
@@ -419,9 +416,9 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 
        /*
         * If we are not using the primary superblock/GDT copy don't resize,
-        * because the user tools have no way of handling this.  Probably a
-        * bad time to do it anyways.
-        */
+         * because the user tools have no way of handling this.  Probably a
+         * bad time to do it anyways.
+         */
        if (EXT4_SB(sb)->s_sbh->b_blocknr !=
            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
                ext4_warning(sb, __func__,
@@ -469,7 +466,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
                goto exit_dindj;
 
        n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
-                       GFP_KERNEL);
+                       GFP_NOFS);
        if (!n_group_desc) {
                err = -ENOMEM;
                ext4_warning(sb, __func__,
@@ -487,12 +484,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
         * reserved inode, and will become GDT blocks (primary and backup).
         */
        data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
-       ext4_journal_dirty_metadata(handle, dind);
+       ext4_handle_dirty_metadata(handle, NULL, dind);
        brelse(dind);
        inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
        ext4_mark_iloc_dirty(handle, inode, &iloc);
        memset((*primary)->b_data, 0, sb->s_blocksize);
-       ext4_journal_dirty_metadata(handle, *primary);
+       ext4_handle_dirty_metadata(handle, NULL, *primary);
 
        o_group_desc = EXT4_SB(sb)->s_group_desc;
        memcpy(n_group_desc, o_group_desc,
@@ -503,19 +500,19 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
        kfree(o_group_desc);
 
        le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
-       ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+       ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
 
        return 0;
 
 exit_inode:
-       //ext4_journal_release_buffer(handle, iloc.bh);
+       /* ext4_journal_release_buffer(handle, iloc.bh); */
        brelse(iloc.bh);
 exit_dindj:
-       //ext4_journal_release_buffer(handle, dind);
+       /* ext4_journal_release_buffer(handle, dind); */
 exit_primary:
-       //ext4_journal_release_buffer(handle, *primary);
+       /* ext4_journal_release_buffer(handle, *primary); */
 exit_sbh:
-       //ext4_journal_release_buffer(handle, *primary);
+       /* ext4_journal_release_buffer(handle, *primary); */
 exit_dind:
        brelse(dind);
 exit_bh:
@@ -552,7 +549,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
        int res, i;
        int err;
 
-       primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL);
+       primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
        if (!primary)
                return -ENOMEM;
 
@@ -564,7 +561,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
        }
 
        blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
-       data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count;
+       data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count %
+                                        EXT4_ADDR_PER_BLOCK(sb));
        end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
 
        /* Get each reserved primary GDT block and verify it holds backups */
@@ -618,7 +616,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
                       primary[i]->b_blocknr, gdbackups,
                       blk + primary[i]->b_blocknr); */
                data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
-               err2 = ext4_journal_dirty_metadata(handle, primary[i]);
+               err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]);
                if (!err)
                        err = err2;
        }
@@ -676,7 +674,8 @@ static void update_backups(struct super_block *sb,
                struct buffer_head *bh;
 
                /* Out of journal space, and can't get more - abort - so sad */
-               if (handle->h_buffer_credits == 0 &&
+               if (ext4_handle_valid(handle) &&
+                   handle->h_buffer_credits == 0 &&
                    ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
                    (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
                        break;
@@ -696,7 +695,7 @@ static void update_backups(struct super_block *sb,
                        memset(bh->b_data + size, 0, rest);
                set_buffer_uptodate(bh);
                unlock_buffer(bh);
-               ext4_journal_dirty_metadata(handle, bh);
+               ext4_handle_dirty_metadata(handle, NULL, bh);
                brelse(bh);
        }
        if ((err2 = ext4_journal_stop(handle)) && !err)
@@ -715,7 +714,7 @@ static void update_backups(struct super_block *sb,
 exit_err:
        if (err) {
                ext4_warning(sb, __func__,
-                            "can't update backup for group %lu (err %d), "
+                            "can't update backup for group %u (err %d), "
                             "forcing fsck on next reboot", group, err);
                sbi->s_mount_state &= ~EXT4_VALID_FS;
                sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
@@ -747,6 +746,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        struct inode *inode = NULL;
        handle_t *handle;
        int gdb_off, gdb_num;
+       int num_grp_locked = 0;
        int err, err2;
 
        gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
@@ -761,19 +761,20 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 
        if (ext4_blocks_count(es) + input->blocks_count <
            ext4_blocks_count(es)) {
-               ext4_warning(sb, __func__, "blocks_count overflow\n");
+               ext4_warning(sb, __func__, "blocks_count overflow");
                return -EINVAL;
        }
 
        if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
            le32_to_cpu(es->s_inodes_count)) {
-               ext4_warning(sb, __func__, "inodes_count overflow\n");
+               ext4_warning(sb, __func__, "inodes_count overflow");
                return -EINVAL;
        }
 
        if (reserved_gdb || gdb_off == 0) {
                if (!EXT4_HAS_COMPAT_FEATURE(sb,
-                                            EXT4_FEATURE_COMPAT_RESIZE_INODE)){
+                                            EXT4_FEATURE_COMPAT_RESIZE_INODE)
+                   || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
                        ext4_warning(sb, __func__,
                                     "No reserved GDT blocks, can't resize");
                        return -EPERM;
@@ -786,6 +787,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
                }
        }
 
+
        if ((err = verify_group_input(sb, input)))
                goto exit_put;
 
@@ -818,12 +820,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
                goto exit_journal;
 
-       /*
-        * We will only either add reserved group blocks to a backup group
-        * or remove reserved blocks for the first group in a new group block.
-        * Doing both would be mean more complex code, and sane people don't
-        * use non-sparse filesystems anymore.  This is already checked above.
-        */
+        /*
+         * We will only either add reserved group blocks to a backup group
+         * or remove reserved blocks for the first group in a new group block.
+         * Doing both would be mean more complex code, and sane people don't
+         * use non-sparse filesystems anymore.  This is already checked above.
+         */
        if (gdb_off) {
                primary = sbi->s_group_desc[gdb_num];
                if ((err = ext4_journal_get_write_access(handle, primary)))
@@ -835,36 +837,50 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        } else if ((err = add_new_gdb(handle, inode, input, &primary)))
                goto exit_journal;
 
-       /*
-        * OK, now we've set up the new group.  Time to make it active.
-        *
-        * Current kernels don't lock all allocations via lock_super(),
-        * so we have to be safe wrt. concurrent accesses the group
-        * data.  So we need to be careful to set all of the relevant
-        * group descriptor data etc. *before* we enable the group.
-        *
-        * The key field here is sbi->s_groups_count: as long as
-        * that retains its old value, nobody is going to access the new
-        * group.
-        *
-        * So first we update all the descriptor metadata for the new
-        * group; then we update the total disk blocks count; then we
-        * update the groups count to enable the group; then finally we
-        * update the free space counts so that the system can start
-        * using the new disk blocks.
-        */
-
+        /*
+         * OK, now we've set up the new group.  Time to make it active.
+         *
+         * Current kernels don't lock all allocations via lock_super(),
+         * so we have to be safe wrt. concurrent accesses the group
+         * data.  So we need to be careful to set all of the relevant
+         * group descriptor data etc. *before* we enable the group.
+         *
+         * The key field here is sbi->s_groups_count: as long as
+         * that retains its old value, nobody is going to access the new
+         * group.
+         *
+         * So first we update all the descriptor metadata for the new
+         * group; then we update the total disk blocks count; then we
+         * update the groups count to enable the group; then finally we
+         * update the free space counts so that the system can start
+         * using the new disk blocks.
+         */
+
+       num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group);
        /* Update group descriptor block for new group */
-       gdp = (struct ext4_group_desc *)primary->b_data + gdb_off;
+       gdp = (struct ext4_group_desc *)((char *)primary->b_data +
+                                        gdb_off * EXT4_DESC_SIZE(sb));
 
+       memset(gdp, 0, EXT4_DESC_SIZE(sb));
        ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
        ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
        ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
-       gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
-       gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
+       ext4_free_blks_set(sb, gdp, input->free_blocks_count);
+       ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
+       gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
 
        /*
+        * We can allocate memory for mb_alloc based on the new group
+        * descriptor
+        */
+       err = ext4_mb_add_groupinfo(sb, input->group, gdp);
+       if (err) {
+               ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
+               goto exit_journal;
+       }
+
+       /*
         * Make the new blocks and inodes valid next.  We do this before
         * increasing the group count so that once the group is enabled,
         * all of its blocks and inodes are already valid.
@@ -904,8 +920,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 
        /* Update the global fs size fields */
        sbi->s_groups_count++;
+       ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
 
-       ext4_journal_dirty_metadata(handle, primary);
+       ext4_handle_dirty_metadata(handle, NULL, primary);
 
        /* Update the reserved block counts only once the new group is
         * active. */
@@ -918,7 +935,16 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        percpu_counter_add(&sbi->s_freeinodes_counter,
                           EXT4_INODES_PER_GROUP(sb));
 
-       ext4_journal_dirty_metadata(handle, sbi->s_sbh);
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+               ext4_group_t flex_group;
+               flex_group = ext4_flex_group(sbi, input->group);
+               atomic_add(input->free_blocks_count,
+                          &sbi->s_flex_groups[flex_group].free_blocks);
+               atomic_add(EXT4_INODES_PER_GROUP(sb),
+                          &sbi->s_flex_groups[flex_group].free_inodes);
+       }
+
+       ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
        sb->s_dirt = 1;
 
 exit_journal:
@@ -936,7 +962,8 @@ exit_put:
        return err;
 } /* ext4_group_add */
 
-/* Extend the filesystem to the new number of blocks specified.  This entry
+/*
+ * Extend the filesystem to the new number of blocks specified.  This entry
  * point is only used to extend the current filesystem to the end of the last
  * existing group.  It can be accessed via ioctl, or by "remount,resize=<size>"
  * for emergencies (because it has no dependencies on reserved blocks).
@@ -952,10 +979,10 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        ext4_group_t o_groups_count;
        ext4_grpblk_t last;
        ext4_grpblk_t add;
-       struct buffer_head * bh;
+       struct buffer_head *bh;
        handle_t *handle;
        int err;
-       unsigned long freed_blocks;
+       ext4_group_t group;
 
        /* We don't need to worry about locking wrt other resizers just
         * yet: we're going to revalidate es->s_blocks_count after
@@ -975,8 +1002,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                        " too large to resize to %llu blocks safely\n",
                        sb->s_id, n_blocks_count);
                if (sizeof(sector_t) < 8)
-                       ext4_warning(sb, __func__,
-                       "CONFIG_LBD not enabled\n");
+                       ext4_warning(sb, __func__, "CONFIG_LBD not enabled");
                return -EINVAL;
        }
 
@@ -987,7 +1013,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        }
 
        /* Handle the remaining blocks in the last group only. */
-       ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
+       ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
 
        if (last == 0) {
                ext4_warning(sb, __func__,
@@ -1012,7 +1038,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                             o_blocks_count + add, add);
 
        /* See if the device is actually as big as what was requested */
-       bh = sb_bread(sb, o_blocks_count + add -1);
+       bh = sb_bread(sb, o_blocks_count + add - 1);
        if (!bh) {
                ext4_warning(sb, __func__,
                             "can't read last block, resize aborted");
@@ -1049,16 +1075,18 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                goto exit_put;
        }
        ext4_blocks_count_set(es, o_blocks_count + add);
-       ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+       ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
        sb->s_dirt = 1;
        unlock_super(sb);
        ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
                   o_blocks_count + add);
-       ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
+       /* We add the blocks to the bitmap and set the group need init bit */
+       ext4_add_groupblocks(handle, sb, o_blocks_count, add);
        ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
                   o_blocks_count + add);
        if ((err = ext4_journal_stop(handle)))
                goto exit_put;
+
        if (test_opt(sb, DEBUG))
                printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
                       ext4_blocks_count(es));