X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fext4%2Fsuper.c;h=22158ebfae2a50e1ec63febac2561bba880e6fdd;hb=4ba9b9d0ba0a49d91fa6417c7510ee36f48cf957;hp=c730cbc84030e6466522130f01639cde3652f808;hpb=e6b4f8da3a88457148038bc952043e99a7fdba64;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c730cbc..22158eb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -29,12 +29,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include @@ -470,7 +472,7 @@ static void ext4_put_super (struct super_block * sb) dump_orphan_list(sb, sbi); J_ASSERT(list_empty(&sbi->s_orphan)); - invalidate_bdev(sb->s_bdev, 0); + invalidate_bdev(sb->s_bdev); if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { /* * Invalidate the journal device's buffers. We don't want them @@ -478,7 +480,7 @@ static void ext4_put_super (struct super_block * sb) * hotswapped, and it breaks the `ro-after' testing code. */ sync_blockdev(sbi->journal_bdev); - invalidate_bdev(sbi->journal_bdev, 0); + invalidate_bdev(sbi->journal_bdev); ext4_blkdev_remove(sbi); } sb->s_fs_info = NULL; @@ -486,7 +488,7 @@ static void ext4_put_super (struct super_block * sb) return; } -static kmem_cache_t *ext4_inode_cachep; +static struct kmem_cache *ext4_inode_cachep; /* * Called inside transaction, so use GFP_NOFS @@ -510,22 +512,27 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) static void ext4_destroy_inode(struct inode *inode) { + if (!list_empty(&(EXT4_I(inode)->i_orphan))) { + printk("EXT4 Inode %p: orphan list check failed!\n", + EXT4_I(inode)); + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, + EXT4_I(inode), sizeof(struct ext4_inode_info), + true); + dump_stack(); + } kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(struct kmem_cache *cachep, void *foo) { struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { - INIT_LIST_HEAD(&ei->i_orphan); + INIT_LIST_HEAD(&ei->i_orphan); #ifdef CONFIG_EXT4DEV_FS_XATTR - init_rwsem(&ei->xattr_sem); + init_rwsem(&ei->xattr_sem); #endif - mutex_init(&ei->truncate_mutex); - inode_init_once(&ei->vfs_inode); - } + mutex_init(&ei->truncate_mutex); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) @@ -534,7 +541,7 @@ static int init_inodecache(void) sizeof(struct ext4_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (ext4_inode_cachep == NULL) return -ENOMEM; return 0; @@ -690,7 +697,7 @@ static struct quotactl_ops ext4_qctl_operations = { }; #endif -static struct super_operations ext4_sops = { +static const struct super_operations ext4_sops = { .alloc_inode = ext4_alloc_inode, .destroy_inode = ext4_destroy_inode, .read_inode = ext4_read_inode, @@ -728,7 +735,7 @@ enum { Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_grpquota, Opt_extents, + Opt_grpquota, Opt_extents, Opt_noextents, }; static match_table_t tokens = { @@ -779,6 +786,7 @@ static match_table_t tokens = { {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, {Opt_extents, "extents"}, + {Opt_noextents, "noextents"}, {Opt_err, NULL}, {Opt_resize, "resize"}, }; @@ -1114,6 +1122,9 @@ clear_qf_name: case Opt_extents: set_opt (sbi->s_mount_opt, EXTENTS); break; + case Opt_noextents: + clear_opt (sbi->s_mount_opt, EXTENTS); + break; default: printk (KERN_ERR "EXT4-fs: Unrecognized mount option \"%s\" " @@ -1272,7 +1283,7 @@ static int ext4_check_descriptors (struct super_block * sb) } inode_table = ext4_inode_table(sb, gdp); if (inode_table < first_block || - inode_table + sbi->s_itb_per_group > last_block) + inode_table + sbi->s_itb_per_group - 1 > last_block) { ext4_error (sb, "ext4_check_descriptors", "Inode table for group %d" @@ -1321,6 +1332,12 @@ static void ext4_orphan_cleanup (struct super_block * sb, return; } + if (bdev_read_only(sb->s_bdev)) { + printk(KERN_ERR "EXT4-fs: write access " + "unavailable, skipping orphan cleanup.\n"); + return; + } + if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { if (es->s_last_orphan) jbd_debug(1, "Errors on filesystem, " @@ -1462,6 +1479,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) int needs_recovery; __le32 features; __u64 blocks_count; + int err; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) @@ -1512,10 +1530,14 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, GRPID); if (def_mount_opts & EXT4_DEFM_UID16) set_opt(sbi->s_mount_opt, NO_UID32); +#ifdef CONFIG_EXT4DEV_FS_XATTR if (def_mount_opts & EXT4_DEFM_XATTR_USER) set_opt(sbi->s_mount_opt, XATTR_USER); +#endif +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL if (def_mount_opts & EXT4_DEFM_ACL) set_opt(sbi->s_mount_opt, POSIX_ACL); +#endif if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) @@ -1535,6 +1557,12 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, RESERVATION); + /* + * turn on extents feature by default in ext4 filesystem + * User -o noextents to turn it off + */ + set_opt(sbi->s_mount_opt, EXTENTS); + if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, NULL, 0)) goto failed_mount; @@ -1618,13 +1646,15 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_inode_size = le16_to_cpu(es->s_inode_size); sbi->s_first_ino = le32_to_cpu(es->s_first_ino); if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || - (sbi->s_inode_size & (sbi->s_inode_size - 1)) || + (!is_power_of_2(sbi->s_inode_size)) || (sbi->s_inode_size > blocksize)) { printk (KERN_ERR "EXT4-fs: unsupported inode size: %d\n", sbi->s_inode_size); goto failed_mount; } + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) + sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); } sbi->s_frag_size = EXT4_MIN_FRAG_SIZE << le32_to_cpu(es->s_log_frag_size); @@ -1730,12 +1760,20 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); - percpu_counter_init(&sbi->s_freeblocks_counter, - ext4_count_free_blocks(sb)); - percpu_counter_init(&sbi->s_freeinodes_counter, - ext4_count_free_inodes(sb)); - percpu_counter_init(&sbi->s_dirs_counter, - ext4_count_dirs(sb)); + err = percpu_counter_init(&sbi->s_freeblocks_counter, + ext4_count_free_blocks(sb)); + if (!err) { + err = percpu_counter_init(&sbi->s_freeinodes_counter, + ext4_count_free_inodes(sb)); + } + if (!err) { + err = percpu_counter_init(&sbi->s_dirs_counter, + ext4_count_dirs(sb)); + } + if (err) { + printk(KERN_ERR "EXT4-fs: insufficient memory\n"); + goto failed_mount3; + } /* per fileystem reservation list head & lock */ spin_lock_init(&sbi->s_rsv_window_lock); @@ -1787,6 +1825,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount3; } + if (ext4_blocks_count(es) > 0xffffffffULL && + !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_64BIT)) { + printk(KERN_ERR "ext4: Failed to set 64-bit journal feature\n"); + goto failed_mount4; + } + /* We have now updated the journal if required, so we can * validate the data journaling mode. */ switch (test_opt(sb, DATA_FLAGS)) { @@ -1841,6 +1886,32 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) } ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); + + /* determine the minimum size of new large inodes, if present */ + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_want_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_want_extra_isize); + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_min_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_min_extra_isize); + } + } + /* Check if enough inode space is available */ + if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > + sbi->s_inode_size) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + printk(KERN_INFO "EXT4-fs: required extra inode space not" + "available.\n"); + } + /* * akpm: core read_super() calls in here with the superblock locked. * That deadlocks, because orphan cleanup needs to lock the superblock @@ -1978,7 +2049,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, if (bd_claim(bdev, sb)) { printk(KERN_ERR - "EXT4: failed to claim external journal device.\n"); + "EXT4: failed to claim external journal device.\n"); blkdev_put(bdev); return NULL; } @@ -2143,6 +2214,7 @@ static int ext4_create_journal(struct super_block * sb, unsigned int journal_inum) { journal_t *journal; + int err; if (sb->s_flags & MS_RDONLY) { printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " @@ -2150,13 +2222,15 @@ static int ext4_create_journal(struct super_block * sb, return -EROFS; } - if (!(journal = ext4_get_journal(sb, journal_inum))) + journal = ext4_get_journal(sb, journal_inum); + if (!journal) return -EINVAL; printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", journal_inum); - if (jbd2_journal_create(journal)) { + err = jbd2_journal_create(journal); + if (err) { printk(KERN_ERR "EXT4-fs: error creating journal.\n"); jbd2_journal_destroy(journal); return -EIO; @@ -2207,12 +2281,14 @@ static void ext4_mark_recovery_complete(struct super_block * sb, jbd2_journal_lock_updates(journal); jbd2_journal_flush(journal); + lock_super(sb); if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && sb->s_flags & MS_RDONLY) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); sb->s_dirt = 0; ext4_commit_super(sb, es, 1); } + unlock_super(sb); jbd2_journal_unlock_updates(journal); } @@ -2401,7 +2477,13 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data) (sbi->s_mount_state & EXT4_VALID_FS)) es->s_state = cpu_to_le16(sbi->s_mount_state); + /* + * We have to unlock super so that we can wait for + * transactions. + */ + unlock_super(sb); ext4_mark_recovery_complete(sb, es); + lock_super(sb); } else { __le32 ret; if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, @@ -2413,6 +2495,22 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data) err = -EROFS; goto restore_opts; } + + /* + * If we have an unprocessed orphan list hanging + * around from a previously readonly bdev mount, + * require a full umount/remount for now. + */ + if (es->s_last_orphan) { + printk(KERN_WARNING "EXT4-fs: %s: couldn't " + "remount RDWR because of unprocessed " + "orphan inode list. Please " + "umount/remount instead.\n", + sb->s_id); + err = -EINVAL; + goto restore_opts; + } + /* * Mounting a RDONLY partition read-write, so reread * and store the current valid flag. (It may have @@ -2458,18 +2556,19 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) struct super_block *sb = dentry->d_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - ext4_fsblk_t overhead; - int i; + u64 fsid; - if (test_opt (sb, MINIX_DF)) - overhead = 0; - else { - unsigned long ngroups; - ngroups = EXT4_SB(sb)->s_groups_count; + if (test_opt(sb, MINIX_DF)) { + sbi->s_overhead_last = 0; + } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { + unsigned long ngroups = sbi->s_groups_count, i; + ext4_fsblk_t overhead = 0; smp_rmb(); /* - * Compute the overhead (FS structures) + * Compute the overhead (FS structures). This is constant + * for a given filesystem unless the number of block groups + * changes so we cache the previous value until it does. */ /* @@ -2493,19 +2592,28 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) * Every block group has an inode bitmap, a block * bitmap, and an inode table. */ - overhead += (ngroups * (2 + EXT4_SB(sb)->s_itb_per_group)); + overhead += ngroups * (2 + sbi->s_itb_per_group); + sbi->s_overhead_last = overhead; + smp_wmb(); + sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); } buf->f_type = EXT4_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; - buf->f_blocks = ext4_blocks_count(es) - overhead; - buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); + buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; + buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); + es->s_free_blocks_count = cpu_to_le32(buf->f_bfree); buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); if (buf->f_bfree < ext4_r_blocks_count(es)) buf->f_bavail = 0; buf->f_files = le32_to_cpu(es->s_inodes_count); - buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); + buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); + es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); buf->f_namelen = EXT4_NAME_LEN; + fsid = le64_to_cpup((void *)es->s_uuid) ^ + le64_to_cpup((void *)es->s_uuid + sizeof(u64)); + buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; + buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; return 0; } @@ -2599,8 +2707,11 @@ static int ext4_release_dquot(struct dquot *dquot) handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); - if (IS_ERR(handle)) + if (IS_ERR(handle)) { + /* Release dquot anyway to avoid endless cycle in dqput() */ + dquot_release(dquot); return PTR_ERR(handle); + } ret = dquot_release(dquot); err = ext4_journal_stop(handle); if (!ret) @@ -2733,6 +2844,12 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); + if (!handle) { + printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)" + " cancelled because transaction is not started.\n", + (unsigned long long)off, (unsigned long long)len); + return -EIO; + } mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ?