X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fext4%2Fsuper.c;h=d83cdcbd9b311b1c2ed334bae93060ec4f302423;hb=84061e07c5fbbbf9dc8aef8fb750fc3a2dfc31f3;hp=91b98b58ccb9e3bb3a68137bd0ca3a38bb1acff5;hpb=88b6edd17c62b7d346d21f4087893ce7d4ef828a;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 91b98b5..d83cdcb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -46,13 +45,10 @@ #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" +#include "mballoc.h" -static int default_mb_history_length = 1000; - -module_param_named(default_mb_history_length, default_mb_history_length, - int, 0644); -MODULE_PARM_DESC(default_mb_history_length, - "Default number of entries saved for mb_history"); +#define CREATE_TRACE_POINTS +#include struct proc_dir_entry *ext4_proc_root; static struct kset *ext4_kset; @@ -72,14 +68,28 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static int ext4_unfreeze(struct super_block *sb); static void ext4_write_super(struct super_block *sb); static int ext4_freeze(struct super_block *sb); +static int ext4_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt); +#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +static struct file_system_type ext3_fs_type = { + .owner = THIS_MODULE, + .name = "ext3", + .get_sb = ext4_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; +#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) +#else +#define IS_EXT3_SB(sb) (0) +#endif ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg) { return le32_to_cpu(bg->bg_block_bitmap_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); + (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); } ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, @@ -87,7 +97,7 @@ ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, { return le32_to_cpu(bg->bg_inode_bitmap_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); + (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); } ext4_fsblk_t ext4_inode_table(struct super_block *sb, @@ -95,7 +105,7 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb, { return le32_to_cpu(bg->bg_inode_table_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); + (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); } __u32 ext4_free_blks_count(struct super_block *sb, @@ -103,7 +113,7 @@ __u32 ext4_free_blks_count(struct super_block *sb, { return le16_to_cpu(bg->bg_free_blocks_count_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); + (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); } __u32 ext4_free_inodes_count(struct super_block *sb, @@ -111,7 +121,7 @@ __u32 ext4_free_inodes_count(struct super_block *sb, { return le16_to_cpu(bg->bg_free_inodes_count_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); + (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); } __u32 ext4_used_dirs_count(struct super_block *sb, @@ -119,7 +129,7 @@ __u32 ext4_used_dirs_count(struct super_block *sb, { return le16_to_cpu(bg->bg_used_dirs_count_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); + (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); } __u32 ext4_itable_unused_count(struct super_block *sb, @@ -127,7 +137,7 @@ __u32 ext4_itable_unused_count(struct super_block *sb, { return le16_to_cpu(bg->bg_itable_unused_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); + (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); } void ext4_block_bitmap_set(struct super_block *sb, @@ -186,6 +196,36 @@ void ext4_itable_unused_set(struct super_block *sb, bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); } + +/* Just increment the non-pointer handle value */ +static handle_t *ext4_get_nojournal(void) +{ + handle_t *handle = current->journal_info; + unsigned long ref_cnt = (unsigned long)handle; + + BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT); + + ref_cnt++; + handle = (handle_t *)ref_cnt; + + current->journal_info = handle; + return handle; +} + + +/* Decrement the non-pointer handle value */ +static void ext4_put_nojournal(handle_t *handle) +{ + unsigned long ref_cnt = (unsigned long)handle; + + BUG_ON(ref_cnt == 0); + + ref_cnt--; + handle = (handle_t *)ref_cnt; + + current->journal_info = handle; +} + /* * Wrappers for jbd2_journal_start/end. * @@ -201,23 +241,19 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) if (sb->s_flags & MS_RDONLY) return ERR_PTR(-EROFS); + vfs_check_frozen(sb, SB_FREEZE_WRITE); /* Special case here: if the journal has aborted behind our * backs (eg. EIO in the commit thread), then we still need to * take the FS itself readonly cleanly. */ journal = EXT4_SB(sb)->s_journal; if (journal) { if (is_journal_aborted(journal)) { - ext4_abort(sb, __func__, - "Detected aborted journal"); + ext4_abort(sb, __func__, "Detected aborted journal"); return ERR_PTR(-EROFS); } return jbd2_journal_start(journal, nblocks); } - /* - * We're not journaling, return the appropriate indication. - */ - current->journal_info = EXT4_NOJOURNAL_HANDLE; - return current->journal_info; + return ext4_get_nojournal(); } /* @@ -233,11 +269,7 @@ int __ext4_journal_stop(const char *where, handle_t *handle) int rc; if (!ext4_handle_valid(handle)) { - /* - * Do this here since we don't call jbd2_journal_stop() in - * no-journal mode. - */ - current->journal_info = NULL; + ext4_put_nojournal(handle); return 0; } sb = handle->h_transaction->t_journal->j_private; @@ -285,7 +317,7 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn, * write out the superblock safely. * * We'll just use the jbd2_journal_abort() error code to record an error in - * the journal instead. On recovery, the journal will compain about + * the journal instead. On recovery, the journal will complain about * that error until we've noted it down and cleared it. */ @@ -302,12 +334,12 @@ static void ext4_handle_error(struct super_block *sb) if (!test_opt(sb, ERRORS_CONT)) { journal_t *journal = EXT4_SB(sb)->s_journal; - EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; + EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; if (journal) jbd2_journal_abort(journal, -EIO); } if (test_opt(sb, ERRORS_RO)) { - printk(KERN_CRIT "Remounting filesystem read-only\n"); + ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); sb->s_flags |= MS_RDONLY; } ext4_commit_super(sb, 1); @@ -316,7 +348,7 @@ static void ext4_handle_error(struct super_block *sb) sb->s_id); } -void ext4_error(struct super_block *sb, const char *function, +void __ext4_error(struct super_block *sb, const char *function, const char *fmt, ...) { va_list args; @@ -330,6 +362,42 @@ void ext4_error(struct super_block *sb, const char *function, ext4_handle_error(sb); } +void ext4_error_inode(const char *function, struct inode *inode, + const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ", + inode->i_sb->s_id, function, inode->i_ino, current->comm); + vprintk(fmt, args); + printk("\n"); + va_end(args); + + ext4_handle_error(inode->i_sb); +} + +void ext4_error_file(const char *function, struct file *file, + const char *fmt, ...) +{ + va_list args; + struct inode *inode = file->f_dentry->d_inode; + char pathname[80], *path; + + va_start(args, fmt); + path = d_path(&(file->f_path), pathname, sizeof(pathname)); + if (!path) + path = "(unknown)"; + printk(KERN_CRIT + "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ", + inode->i_sb->s_id, function, inode->i_ino, current->comm, path); + vprintk(fmt, args); + printk("\n"); + va_end(args); + + ext4_handle_error(inode->i_sb); +} + static const char *ext4_decode_error(struct super_block *sb, int errno, char nbuf[16]) { @@ -343,7 +411,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno, errstr = "Out of memory"; break; case -EROFS: - if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) + if (!sb || (EXT4_SB(sb)->s_journal && + EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) errstr = "Journal has aborted"; else errstr = "Readonly filesystem"; @@ -400,8 +469,6 @@ void ext4_abort(struct super_block *sb, const char *function, { va_list args; - printk(KERN_CRIT "ext4_abort called.\n"); - va_start(args, fmt); printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); vprintk(fmt, args); @@ -414,15 +481,27 @@ void ext4_abort(struct super_block *sb, const char *function, if (sb->s_flags & MS_RDONLY) return; - printk(KERN_CRIT "Remounting filesystem read-only\n"); + ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; sb->s_flags |= MS_RDONLY; - EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; + EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; if (EXT4_SB(sb)->s_journal) jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); } -void ext4_warning(struct super_block *sb, const char *function, +void ext4_msg (struct super_block * sb, const char *prefix, + const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + printk("%sEXT4-fs (%s): ", prefix, sb->s_id); + vprintk(fmt, args); + printk("\n"); + va_end(args); +} + +void __ext4_warning(struct super_block *sb, const char *function, const char *fmt, ...) { va_list args; @@ -436,7 +515,7 @@ void ext4_warning(struct super_block *sb, const char *function, } void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, - const char *function, const char *fmt, ...) + const char *function, const char *fmt, ...) __releases(bitlock) __acquires(bitlock) { @@ -472,7 +551,6 @@ __acquires(bitlock) return; } - void ext4_update_dynamic_rev(struct super_block *sb) { struct ext4_super_block *es = EXT4_SB(sb)->s_es; @@ -480,7 +558,7 @@ void ext4_update_dynamic_rev(struct super_block *sb) if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) return; - ext4_warning(sb, __func__, + ext4_warning(sb, "updating to rev %d because of new feature flag, " "running e2fsck is recommended", EXT4_DYNAMIC_REV); @@ -501,7 +579,7 @@ void ext4_update_dynamic_rev(struct super_block *sb) /* * Open the external journal device */ -static struct block_device *ext4_blkdev_get(dev_t dev) +static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) { struct block_device *bdev; char b[BDEVNAME_SIZE]; @@ -512,7 +590,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev) return bdev; fail: - printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n", + ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", __bdevname(dev, b), PTR_ERR(bdev)); return NULL; } @@ -548,8 +626,8 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) { struct list_head *l; - printk(KERN_ERR "sb orphan head is %d\n", - le32_to_cpu(sbi->s_es->s_last_orphan)); + ext4_msg(sb, KERN_ERR, "sb orphan head is %d", + le32_to_cpu(sbi->s_es->s_last_orphan)); printk(KERN_ERR "sb_info orphan list:\n"); list_for_each(l, &sbi->s_orphan) { @@ -568,10 +646,14 @@ static void ext4_put_super(struct super_block *sb) struct ext4_super_block *es = sbi->s_es; int i, err; - ext4_release_system_zone(sb); - ext4_mb_release(sb); - ext4_ext_release(sb); - ext4_xattr_put_super(sb); + flush_workqueue(sbi->dio_unwritten_wq); + destroy_workqueue(sbi->dio_unwritten_wq); + + lock_super(sb); + lock_kernel(); + if (sb->s_dirt) + ext4_commit_super(sb, 1); + if (sbi->s_journal) { err = jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; @@ -579,6 +661,12 @@ static void ext4_put_super(struct super_block *sb) ext4_abort(sb, __func__, "Couldn't clean up the journal"); } + + ext4_release_system_zone(sb); + ext4_mb_release(sb); + ext4_ext_release(sb); + ext4_xattr_put_super(sb); + if (!(sb->s_flags & MS_RDONLY)) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); @@ -634,11 +722,8 @@ static void ext4_put_super(struct super_block *sb) unlock_super(sb); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); - lock_super(sb); - lock_kernel(); kfree(sbi->s_blockgroup_lock); kfree(sbi); - return; } static struct kmem_cache *ext4_inode_cachep; @@ -653,10 +738,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); if (!ei) return NULL; -#ifdef CONFIG_EXT4_FS_POSIX_ACL - ei->i_acl = EXT4_ACL_NOT_CACHED; - ei->i_default_acl = EXT4_ACL_NOT_CACHED; -#endif + ei->vfs_inode.i_version = 1; ei->vfs_inode.i_data.writeback_index = 0; memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); @@ -671,16 +753,27 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_reserved_data_blocks = 0; ei->i_reserved_meta_blocks = 0; ei->i_allocated_meta_blocks = 0; + ei->i_da_metadata_calc_len = 0; ei->i_delalloc_reserved_flag = 0; spin_lock_init(&(ei->i_block_reservation_lock)); +#ifdef CONFIG_QUOTA + ei->i_reserved_quota = 0; +#endif + INIT_LIST_HEAD(&ei->i_completed_io_list); + spin_lock_init(&ei->i_completed_io_lock); + ei->cur_aio_dio = NULL; + ei->i_sync_tid = 0; + ei->i_datasync_tid = 0; + return &ei->vfs_inode; } static void ext4_destroy_inode(struct inode *inode) { if (!list_empty(&(EXT4_I(inode)->i_orphan))) { - printk("EXT4 Inode %p: orphan list check failed!\n", - EXT4_I(inode)); + ext4_msg(inode->i_sb, KERN_ERR, + "Inode %lu (%p): orphan list check failed!", + inode->i_ino, EXT4_I(inode)); print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, EXT4_I(inode), sizeof(struct ext4_inode_info), true); @@ -720,18 +813,7 @@ static void destroy_inodecache(void) static void ext4_clear_inode(struct inode *inode) { -#ifdef CONFIG_EXT4_FS_POSIX_ACL - if (EXT4_I(inode)->i_acl && - EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { - posix_acl_release(EXT4_I(inode)->i_acl); - EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED; - } - if (EXT4_I(inode)->i_default_acl && - EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) { - posix_acl_release(EXT4_I(inode)->i_default_acl); - EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; - } -#endif + dquot_drop(inode); ext4_discard_preallocations(inode); if (EXT4_JOURNAL(inode)) jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, @@ -744,9 +826,22 @@ static inline void ext4_show_quota_options(struct seq_file *seq, #if defined(CONFIG_QUOTA) struct ext4_sb_info *sbi = EXT4_SB(sb); - if (sbi->s_jquota_fmt) - seq_printf(seq, ",jqfmt=%s", - (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); + if (sbi->s_jquota_fmt) { + char *fmtname = ""; + + switch (sbi->s_jquota_fmt) { + case QFMT_VFS_OLD: + fmtname = "vfsold"; + break; + case QFMT_VFS_V0: + fmtname = "vfsv0"; + break; + case QFMT_VFS_V1: + fmtname = "vfsv1"; + break; + } + seq_printf(seq, ",jqfmt=%s", fmtname); + } if (sbi->s_qf_names[USRQUOTA]) seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); @@ -754,10 +849,10 @@ static inline void ext4_show_quota_options(struct seq_file *seq, if (sbi->s_qf_names[GRPQUOTA]) seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); - if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) + if (test_opt(sb, USRQUOTA)) seq_puts(seq, ",usrquota"); - if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) + if (test_opt(sb, GRPQUOTA)) seq_puts(seq, ",grpquota"); #endif } @@ -878,13 +973,22 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) if (test_opt(sb, NO_AUTO_DA_ALLOC)) seq_puts(seq, ",noauto_da_alloc"); + if (test_opt(sb, DISCARD)) + seq_puts(seq, ",discard"); + + if (test_opt(sb, NOLOAD)) + seq_puts(seq, ",norecovery"); + + if (test_opt(sb, DIOREAD_NOLOCK)) + seq_puts(seq, ",dioread_nolock"); + ext4_show_quota_options(seq, sb); + return 0; } - static struct inode *ext4_nfs_get_inode(struct super_block *sb, - u64 ino, u32 generation) + u64 ino, u32 generation) { struct inode *inode; @@ -913,14 +1017,14 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, } static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) + int fh_len, int fh_type) { return generic_fh_to_dentry(sb, fid, fh_len, fh_type, ext4_nfs_get_inode); } static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) + int fh_len, int fh_type) { return generic_fh_to_parent(sb, fid, fh_len, fh_type, ext4_nfs_get_inode); @@ -932,7 +1036,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, * which would prevent try_to_free_buffers() from freeing them, we must use * jbd2 layer's try_to_free_buffers() function to release them. */ -static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait) +static int bdev_try_to_free_page(struct super_block *sb, struct page *page, + gfp_t wait) { journal_t *journal = EXT4_SB(sb)->s_journal; @@ -962,18 +1067,10 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, static ssize_t ext4_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); -static struct dquot_operations ext4_quota_operations = { - .initialize = dquot_initialize, - .drop = dquot_drop, - .alloc_space = dquot_alloc_space, - .reserve_space = dquot_reserve_space, - .claim_space = dquot_claim_space, - .release_rsv = dquot_release_reserved_space, +static const struct dquot_operations ext4_quota_operations = { +#ifdef CONFIG_QUOTA .get_reserved_space = ext4_get_reserved_space, - .alloc_inode = dquot_alloc_inode, - .free_space = dquot_free_space, - .free_inode = dquot_free_inode, - .transfer = dquot_transfer, +#endif .write_dquot = ext4_write_dquot, .acquire_dquot = ext4_acquire_dquot, .release_dquot = ext4_release_dquot, @@ -983,7 +1080,7 @@ static struct dquot_operations ext4_quota_operations = { .destroy_dquot = dquot_destroy, }; -static struct quotactl_ops ext4_qctl_operations = { +static const struct quotactl_ops ext4_qctl_operations = { .quota_on = ext4_quota_on, .quota_off = vfs_quota_off, .quota_sync = vfs_quota_sync, @@ -1050,14 +1147,16 @@ enum { Opt_journal_update, Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length, + Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, - Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, - Opt_usrquota, Opt_grpquota, Opt_i_version, + Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, + Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, + Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_block_validity, Opt_noblock_validity, - Opt_inode_readahead_blks, Opt_journal_ioprio + Opt_inode_readahead_blks, Opt_journal_ioprio, + Opt_dioread_nolock, Opt_dioread_lock, + Opt_discard, Opt_nodiscard, }; static const match_table_t tokens = { @@ -1082,6 +1181,7 @@ static const match_table_t tokens = { {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, {Opt_noload, "noload"}, + {Opt_noload, "norecovery"}, {Opt_nobh, "nobh"}, {Opt_bh, "bh"}, {Opt_commit, "commit=%u"}, @@ -1097,13 +1197,13 @@ static const match_table_t tokens = { {Opt_data_writeback, "data=writeback"}, {Opt_data_err_abort, "data_err=abort"}, {Opt_data_err_ignore, "data_err=ignore"}, - {Opt_mb_history_length, "mb_history_length=%u"}, {Opt_offusrjquota, "usrjquota="}, {Opt_usrjquota, "usrjquota=%s"}, {Opt_offgrpjquota, "grpjquota="}, {Opt_grpjquota, "grpjquota=%s"}, {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, + {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, {Opt_grpquota, "grpquota"}, {Opt_noquota, "noquota"}, {Opt_quota, "quota"}, @@ -1123,6 +1223,10 @@ static const match_table_t tokens = { {Opt_auto_da_alloc, "auto_da_alloc=%u"}, {Opt_auto_da_alloc, "auto_da_alloc"}, {Opt_noauto_da_alloc, "noauto_da_alloc"}, + {Opt_dioread_nolock, "dioread_nolock"}, + {Opt_dioread_lock, "dioread_lock"}, + {Opt_discard, "discard"}, + {Opt_nodiscard, "nodiscard"}, {Opt_err, NULL}, }; @@ -1133,8 +1237,9 @@ static ext4_fsblk_t get_sb_block(void **data) if (!options || strncmp(options, "sb=", 3) != 0) return 1; /* Default location */ + options += 3; - /*todo: use simple_strtoll with >32bit ext4 */ + /* TODO: use simple_strtoll with >32bit ext4 */ sb_block = simple_strtoul(options, &options, 0); if (*options && *options != ',') { printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", @@ -1144,10 +1249,71 @@ static ext4_fsblk_t get_sb_block(void **data) if (*options == ',') options++; *data = (void *) options; + return sb_block; } #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) +static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" + "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; + +#ifdef CONFIG_QUOTA +static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + char *qname; + + if (sb_any_quota_loaded(sb) && + !sbi->s_qf_names[qtype]) { + ext4_msg(sb, KERN_ERR, + "Cannot change journaled " + "quota options when quota turned on"); + return 0; + } + qname = match_strdup(args); + if (!qname) { + ext4_msg(sb, KERN_ERR, + "Not enough memory for storing quotafile name"); + return 0; + } + if (sbi->s_qf_names[qtype] && + strcmp(sbi->s_qf_names[qtype], qname)) { + ext4_msg(sb, KERN_ERR, + "%s quota file already specified", QTYPE2NAME(qtype)); + kfree(qname); + return 0; + } + sbi->s_qf_names[qtype] = qname; + if (strchr(sbi->s_qf_names[qtype], '/')) { + ext4_msg(sb, KERN_ERR, + "quotafile must be on filesystem root"); + kfree(sbi->s_qf_names[qtype]); + sbi->s_qf_names[qtype] = NULL; + return 0; + } + set_opt(sbi->s_mount_opt, QUOTA); + return 1; +} + +static int clear_qf_name(struct super_block *sb, int qtype) +{ + + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (sb_any_quota_loaded(sb) && + sbi->s_qf_names[qtype]) { + ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" + " when quota turned on"); + return 0; + } + /* + * The space will be released later when all options are confirmed + * to be correct + */ + sbi->s_qf_names[qtype] = NULL; + return 1; +} +#endif static int parse_options(char *options, struct super_block *sb, unsigned long *journal_devnum, @@ -1160,8 +1326,7 @@ static int parse_options(char *options, struct super_block *sb, int data_opt = 0; int option; #ifdef CONFIG_QUOTA - int qtype, qfmt; - char *qname; + int qfmt; #endif if (!options) @@ -1172,19 +1337,31 @@ static int parse_options(char *options, struct super_block *sb, if (!*p) continue; + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].to = args[0].from = 0; token = match_token(p, tokens, args); switch (token) { case Opt_bsd_df: + ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); clear_opt(sbi->s_mount_opt, MINIX_DF); break; case Opt_minix_df: + ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); set_opt(sbi->s_mount_opt, MINIX_DF); + break; case Opt_grpid: + ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); set_opt(sbi->s_mount_opt, GRPID); + break; case Opt_nogrpid: + ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); clear_opt(sbi->s_mount_opt, GRPID); + break; case Opt_resuid: if (match_int(&args[0], &option)) @@ -1237,8 +1414,7 @@ static int parse_options(char *options, struct super_block *sb, #else case Opt_user_xattr: case Opt_nouser_xattr: - printk(KERN_ERR "EXT4 (no)user_xattr options " - "not supported\n"); + ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); break; #endif #ifdef CONFIG_EXT4_FS_POSIX_ACL @@ -1251,8 +1427,7 @@ static int parse_options(char *options, struct super_block *sb, #else case Opt_acl: case Opt_noacl: - printk(KERN_ERR "EXT4 (no)acl options " - "not supported\n"); + ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); break; #endif case Opt_journal_update: @@ -1262,16 +1437,16 @@ static int parse_options(char *options, struct super_block *sb, user to specify an existing inode to be the journal file. */ if (is_remount) { - printk(KERN_ERR "EXT4-fs: cannot specify " - "journal on remount\n"); + ext4_msg(sb, KERN_ERR, + "Cannot specify journal on remount"); return 0; } set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); break; case Opt_journal_dev: if (is_remount) { - printk(KERN_ERR "EXT4-fs: cannot specify " - "journal on remount\n"); + ext4_msg(sb, KERN_ERR, + "Cannot specify journal on remount"); return 0; } if (match_int(&args[0], &option)) @@ -1323,15 +1498,13 @@ static int parse_options(char *options, struct super_block *sb, data_opt = EXT4_MOUNT_WRITEBACK_DATA; datacheck: if (is_remount) { - if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) - != data_opt) { - printk(KERN_ERR - "EXT4-fs: cannot change data " - "mode on remount\n"); + if (test_opt(sb, DATA_FLAGS) != data_opt) { + ext4_msg(sb, KERN_ERR, + "Cannot change data mode on remount"); return 0; } } else { - sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; + clear_opt(sbi->s_mount_opt, DATA_FLAGS); sbi->s_mount_opt |= data_opt; } break; @@ -1341,83 +1514,38 @@ static int parse_options(char *options, struct super_block *sb, case Opt_data_err_ignore: clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); break; - case Opt_mb_history_length: - if (match_int(&args[0], &option)) - return 0; - if (option < 0) - return 0; - sbi->s_mb_history_max = option; - break; #ifdef CONFIG_QUOTA case Opt_usrjquota: - qtype = USRQUOTA; - goto set_qf_name; - case Opt_grpjquota: - qtype = GRPQUOTA; -set_qf_name: - if (sb_any_quota_loaded(sb) && - !sbi->s_qf_names[qtype]) { - printk(KERN_ERR - "EXT4-fs: Cannot change journaled " - "quota options when quota turned on.\n"); - return 0; - } - qname = match_strdup(&args[0]); - if (!qname) { - printk(KERN_ERR - "EXT4-fs: not enough memory for " - "storing quotafile name.\n"); - return 0; - } - if (sbi->s_qf_names[qtype] && - strcmp(sbi->s_qf_names[qtype], qname)) { - printk(KERN_ERR - "EXT4-fs: %s quota file already " - "specified.\n", QTYPE2NAME(qtype)); - kfree(qname); + if (!set_qf_name(sb, USRQUOTA, &args[0])) return 0; - } - sbi->s_qf_names[qtype] = qname; - if (strchr(sbi->s_qf_names[qtype], '/')) { - printk(KERN_ERR - "EXT4-fs: quotafile must be on " - "filesystem root.\n"); - kfree(sbi->s_qf_names[qtype]); - sbi->s_qf_names[qtype] = NULL; + break; + case Opt_grpjquota: + if (!set_qf_name(sb, GRPQUOTA, &args[0])) return 0; - } - set_opt(sbi->s_mount_opt, QUOTA); break; case Opt_offusrjquota: - qtype = USRQUOTA; - goto clear_qf_name; + if (!clear_qf_name(sb, USRQUOTA)) + return 0; + break; case Opt_offgrpjquota: - qtype = GRPQUOTA; -clear_qf_name: - if (sb_any_quota_loaded(sb) && - sbi->s_qf_names[qtype]) { - printk(KERN_ERR "EXT4-fs: Cannot change " - "journaled quota options when " - "quota turned on.\n"); + if (!clear_qf_name(sb, GRPQUOTA)) return 0; - } - /* - * The space will be released later when all options - * are confirmed to be correct - */ - sbi->s_qf_names[qtype] = NULL; break; + case Opt_jqfmt_vfsold: qfmt = QFMT_VFS_OLD; goto set_qf_format; case Opt_jqfmt_vfsv0: qfmt = QFMT_VFS_V0; + goto set_qf_format; + case Opt_jqfmt_vfsv1: + qfmt = QFMT_VFS_V1; set_qf_format: if (sb_any_quota_loaded(sb) && sbi->s_jquota_fmt != qfmt) { - printk(KERN_ERR "EXT4-fs: Cannot change " + ext4_msg(sb, KERN_ERR, "Cannot change " "journaled quota options when " - "quota turned on.\n"); + "quota turned on"); return 0; } sbi->s_jquota_fmt = qfmt; @@ -1433,8 +1561,8 @@ set_qf_format: break; case Opt_noquota: if (sb_any_quota_loaded(sb)) { - printk(KERN_ERR "EXT4-fs: Cannot change quota " - "options when quota turned on.\n"); + ext4_msg(sb, KERN_ERR, "Cannot change quota " + "options when quota turned on"); return 0; } clear_opt(sbi->s_mount_opt, QUOTA); @@ -1445,8 +1573,8 @@ set_qf_format: case Opt_quota: case Opt_usrquota: case Opt_grpquota: - printk(KERN_ERR - "EXT4-fs: quota options not supported.\n"); + ext4_msg(sb, KERN_ERR, + "quota options not supported"); break; case Opt_usrjquota: case Opt_grpjquota: @@ -1454,24 +1582,25 @@ set_qf_format: case Opt_offgrpjquota: case Opt_jqfmt_vfsold: case Opt_jqfmt_vfsv0: - printk(KERN_ERR - "EXT4-fs: journaled quota options not " - "supported.\n"); + case Opt_jqfmt_vfsv1: + ext4_msg(sb, KERN_ERR, + "journaled quota options not supported"); break; case Opt_noquota: break; #endif case Opt_abort: - set_opt(sbi->s_mount_opt, ABORT); + sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; break; case Opt_nobarrier: clear_opt(sbi->s_mount_opt, BARRIER); break; case Opt_barrier: - if (match_int(&args[0], &option)) { - set_opt(sbi->s_mount_opt, BARRIER); - break; - } + if (args[0].from) { + if (match_int(&args[0], &option)) + return 0; + } else + option = 1; /* No argument, default to 1 */ if (option) set_opt(sbi->s_mount_opt, BARRIER); else @@ -1481,8 +1610,9 @@ set_qf_format: break; case Opt_resize: if (!is_remount) { - printk("EXT4-fs: resize option only available " - "for remount\n"); + ext4_msg(sb, KERN_ERR, + "resize option only available " + "for remount"); return 0; } if (match_int(&args[0], &option) != 0) @@ -1524,8 +1654,9 @@ set_qf_format: if (option < 0 || option > (1 << 30)) return 0; if (!is_power_of_2(option)) { - printk(KERN_ERR "EXT4-fs: inode_readahead_blks" - " must be a power of 2\n"); + ext4_msg(sb, KERN_ERR, + "EXT4-fs: inode_readahead_blks" + " must be a power of 2"); return 0; } sbi->s_inode_readahead_blks = option; @@ -1542,51 +1673,59 @@ set_qf_format: set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); break; case Opt_auto_da_alloc: - if (match_int(&args[0], &option)) { - clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); - break; - } + if (args[0].from) { + if (match_int(&args[0], &option)) + return 0; + } else + option = 1; /* No argument, default to 1 */ if (option) clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); else set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); break; + case Opt_discard: + set_opt(sbi->s_mount_opt, DISCARD); + break; + case Opt_nodiscard: + clear_opt(sbi->s_mount_opt, DISCARD); + break; + case Opt_dioread_nolock: + set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); + break; + case Opt_dioread_lock: + clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); + break; default: - printk(KERN_ERR - "EXT4-fs: Unrecognized mount option \"%s\" " - "or missing value\n", p); + ext4_msg(sb, KERN_ERR, + "Unrecognized mount option \"%s\" " + "or missing value", p); return 0; } } #ifdef CONFIG_QUOTA if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { - if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && - sbi->s_qf_names[USRQUOTA]) + if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) clear_opt(sbi->s_mount_opt, USRQUOTA); - if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && - sbi->s_qf_names[GRPQUOTA]) + if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) clear_opt(sbi->s_mount_opt, GRPQUOTA); - if ((sbi->s_qf_names[USRQUOTA] && - (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || - (sbi->s_qf_names[GRPQUOTA] && - (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { - printk(KERN_ERR "EXT4-fs: old and new quota " - "format mixing.\n"); + if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { + ext4_msg(sb, KERN_ERR, "old and new quota " + "format mixing"); return 0; } if (!sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journaled quota format " - "not specified.\n"); + ext4_msg(sb, KERN_ERR, "journaled quota format " + "not specified"); return 0; } } else { if (sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journaled quota format " + ext4_msg(sb, KERN_ERR, "journaled quota format " "specified with no journaling " - "enabled.\n"); + "enabled"); return 0; } } @@ -1601,32 +1740,32 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, int res = 0; if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { - printk(KERN_ERR "EXT4-fs warning: revision level too high, " - "forcing read-only mode\n"); + ext4_msg(sb, KERN_ERR, "revision level too high, " + "forcing read-only mode"); res = MS_RDONLY; } if (read_only) return res; if (!(sbi->s_mount_state & EXT4_VALID_FS)) - printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " - "running e2fsck is recommended\n"); + ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " + "running e2fsck is recommended"); else if ((sbi->s_mount_state & EXT4_ERROR_FS)) - printk(KERN_WARNING - "EXT4-fs warning: mounting fs with errors, " - "running e2fsck is recommended\n"); + ext4_msg(sb, KERN_WARNING, + "warning: mounting fs with errors, " + "running e2fsck is recommended"); else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && le16_to_cpu(es->s_mnt_count) >= (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) - printk(KERN_WARNING - "EXT4-fs warning: maximal mount count reached, " - "running e2fsck is recommended\n"); + ext4_msg(sb, KERN_WARNING, + "warning: maximal mount count reached, " + "running e2fsck is recommended"); else if (le32_to_cpu(es->s_checkinterval) && (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds())) - printk(KERN_WARNING - "EXT4-fs warning: checktime reached, " - "running e2fsck is recommended\n"); - if (!sbi->s_journal) + ext4_msg(sb, KERN_WARNING, + "warning: checktime reached, " + "running e2fsck is recommended"); + if (!sbi->s_journal) es->s_state &= cpu_to_le16(~EXT4_VALID_FS); if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); @@ -1639,20 +1778,13 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, ext4_commit_super(sb, 1); if (test_opt(sb, DEBUG)) printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " - "bpg=%lu, ipg=%lu, mo=%04lx]\n", + "bpg=%lu, ipg=%lu, mo=%04x]\n", sb->s_blocksize, sbi->s_groups_count, EXT4_BLOCKS_PER_GROUP(sb), EXT4_INODES_PER_GROUP(sb), sbi->s_mount_opt); - if (EXT4_SB(sb)->s_journal) { - printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", - sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : - "external", EXT4_SB(sb)->s_journal->j_devname); - } else { - printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id); - } return res; } @@ -1666,14 +1798,14 @@ static int ext4_fill_flex_info(struct super_block *sb) size_t size; int i; - if (!sbi->s_es->s_log_groups_per_flex) { + sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; + groups_per_flex = 1 << sbi->s_log_groups_per_flex; + + if (groups_per_flex < 2) { sbi->s_log_groups_per_flex = 0; return 1; } - sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; - groups_per_flex = 1 << sbi->s_log_groups_per_flex; - /* We allocate both existing and potentially added groups */ flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << @@ -1686,8 +1818,8 @@ static int ext4_fill_flex_info(struct super_block *sb) memset(sbi->s_flex_groups, 0, size); } if (sbi->s_flex_groups == NULL) { - printk(KERN_ERR "EXT4-fs: not enough memory for " - "%u flex groups\n", flex_group_count); + ext4_msg(sb, KERN_ERR, "not enough memory for " + "%u flex groups", flex_group_count); goto failed; } @@ -1695,12 +1827,12 @@ static int ext4_fill_flex_info(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); flex_group = ext4_flex_group(sbi, i); - atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, - ext4_free_inodes_count(sb, gdp)); - atomic_set(&sbi->s_flex_groups[flex_group].free_blocks, - ext4_free_blks_count(sb, gdp)); - atomic_set(&sbi->s_flex_groups[flex_group].used_dirs, - ext4_used_dirs_count(sb, gdp)); + atomic_add(ext4_free_inodes_count(sb, gdp), + &sbi->s_flex_groups[flex_group].free_inodes); + atomic_add(ext4_free_blks_count(sb, gdp), + &sbi->s_flex_groups[flex_group].free_blocks); + atomic_add(ext4_used_dirs_count(sb, gdp), + &sbi->s_flex_groups[flex_group].used_dirs); } return 1; @@ -1773,32 +1905,32 @@ static int ext4_check_descriptors(struct super_block *sb) block_bitmap = ext4_block_bitmap(sb, gdp); if (block_bitmap < first_block || block_bitmap > last_block) { - printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u not in group " - "(block %llu)!\n", i, block_bitmap); + "(block %llu)!", i, block_bitmap); return 0; } inode_bitmap = ext4_inode_bitmap(sb, gdp); if (inode_bitmap < first_block || inode_bitmap > last_block) { - printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u not in group " - "(block %llu)!\n", i, inode_bitmap); + "(block %llu)!", i, inode_bitmap); return 0; } inode_table = ext4_inode_table(sb, gdp); if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { - printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode table for group %u not in group " - "(block %llu)!\n", i, inode_table); + "(block %llu)!", i, inode_table); return 0; } ext4_lock_group(sb, i); if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { - printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " - "Checksum for group %u failed (%u!=%u)\n", - i, le16_to_cpu(ext4_group_desc_csum(sbi, i, - gdp)), le16_to_cpu(gdp->bg_checksum)); + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Checksum for group %u failed (%u!=%u)", + i, le16_to_cpu(ext4_group_desc_csum(sbi, i, + gdp)), le16_to_cpu(gdp->bg_checksum)); if (!(sb->s_flags & MS_RDONLY)) { ext4_unlock_group(sb, i); return 0; @@ -1810,7 +1942,7 @@ static int ext4_check_descriptors(struct super_block *sb) } ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); - sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); + sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); return 1; } @@ -1845,8 +1977,8 @@ static void ext4_orphan_cleanup(struct super_block *sb, } if (bdev_read_only(sb->s_bdev)) { - printk(KERN_ERR "EXT4-fs: write access " - "unavailable, skipping orphan cleanup.\n"); + ext4_msg(sb, KERN_ERR, "write access " + "unavailable, skipping orphan cleanup"); return; } @@ -1860,8 +1992,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, } if (s_flags & MS_RDONLY) { - printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", - sb->s_id); + ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); sb->s_flags &= ~MS_RDONLY; } #ifdef CONFIG_QUOTA @@ -1872,9 +2003,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, if (EXT4_SB(sb)->s_qf_names[i]) { int ret = ext4_quota_on_mount(sb, i); if (ret < 0) - printk(KERN_ERR - "EXT4-fs: Cannot turn on journaled " - "quota: error %d\n", ret); + ext4_msg(sb, KERN_ERR, + "Cannot turn on journaled " + "quota: error %d", ret); } } #endif @@ -1889,18 +2020,18 @@ static void ext4_orphan_cleanup(struct super_block *sb, } list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); - vfs_dq_init(inode); + dquot_initialize(inode); if (inode->i_nlink) { - printk(KERN_DEBUG - "%s: truncating inode %lu to %lld bytes\n", + ext4_msg(sb, KERN_DEBUG, + "%s: truncating inode %lu to %lld bytes", __func__, inode->i_ino, inode->i_size); jbd_debug(2, "truncating inode %lu to %lld bytes\n", inode->i_ino, inode->i_size); ext4_truncate(inode); nr_truncates++; } else { - printk(KERN_DEBUG - "%s: deleting unreferenced inode %lu\n", + ext4_msg(sb, KERN_DEBUG, + "%s: deleting unreferenced inode %lu", __func__, inode->i_ino); jbd_debug(2, "deleting unreferenced inode %lu\n", inode->i_ino); @@ -1912,11 +2043,11 @@ static void ext4_orphan_cleanup(struct super_block *sb, #define PLURAL(x) (x), ((x) == 1) ? "" : "s" if (nr_orphans) - printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", - sb->s_id, PLURAL(nr_orphans)); + ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", + PLURAL(nr_orphans)); if (nr_truncates) - printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", - sb->s_id, PLURAL(nr_truncates)); + ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", + PLURAL(nr_truncates)); #ifdef CONFIG_QUOTA /* Turn quotas off */ for (i = 0; i < MAXQUOTAS; i++) { @@ -1926,6 +2057,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, #endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ } + /* * Maximal extent format file size. * Resulting logical blkno at s_maxbytes must fit in our on-disk @@ -1943,7 +2075,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) /* small i_blocks in vfs inode? */ if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { /* - * CONFIG_LBD is not enabled implies the inode + * CONFIG_LBDAF is not enabled implies the inode * i_block represent total blocks in 512 bytes * 32 == size of vfs inode i_blocks * 8 */ @@ -1976,19 +2108,19 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) loff_t res = EXT4_NDIR_BLOCKS; int meta_blocks; loff_t upper_limit; - /* This is calculated to be the largest file size for a - * dense, bitmapped file such that the total number of - * sectors in the file, including data and all indirect blocks, - * does not exceed 2^48 -1 - * __u32 i_blocks_lo and _u16 i_blocks_high representing the - * total number of 512 bytes blocks of the file + /* This is calculated to be the largest file size for a dense, block + * mapped file such that the file's total number of 512-byte sectors, + * including data and all indirect blocks, does not exceed (2^48 - 1). + * + * __u32 i_blocks_lo and _u16 i_blocks_high represent the total + * number of 512-byte sectors of the file. */ if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { /* - * !has_huge_files or CONFIG_LBD is not enabled - * implies the inode i_block represent total blocks in - * 512 bytes 32 == size of vfs inode i_blocks * 8 + * !has_huge_files or CONFIG_LBDAF not enabled implies that + * the inode i_block field represents total file blocks in + * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 */ upper_limit = (1LL << 32) - 1; @@ -2030,7 +2162,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) } static ext4_fsblk_t descriptor_loc(struct super_block *sb, - ext4_fsblk_t logical_sb_block, int nr) + ext4_fsblk_t logical_sb_block, int nr) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t bg, first_meta_bg; @@ -2044,6 +2176,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, bg = sbi->s_desc_per_block * nr; if (ext4_bg_has_super(sb, bg)) has_super = 1; + return (has_super + ext4_group_first_block_no(sb, bg)); } @@ -2091,11 +2224,8 @@ static int parse_strtoul(const char *buf, { char *endp; - while (*buf && isspace(*buf)) - buf++; - *value = simple_strtoul(buf, &endp, 0); - while (*endp && isspace(*endp)) - endp++; + *value = simple_strtoul(skip_spaces(buf), &endp, 0); + endp = skip_spaces(endp); if (*endp || *value > max) return -EINVAL; @@ -2126,9 +2256,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, struct super_block *sb = sbi->s_buddy_cache->i_sb; return snprintf(buf, PAGE_SIZE, "%llu\n", - sbi->s_kbytes_written + + (unsigned long long)(sbi->s_kbytes_written + ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - - EXT4_SB(sb)->s_sectors_written_start) >> 1)); + EXT4_SB(sb)->s_sectors_written_start) >> 1))); } static ssize_t inode_readahead_blks_store(struct ext4_attr *a, @@ -2148,7 +2278,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, } static ssize_t sbi_ui_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) + struct ext4_sb_info *sbi, char *buf) { unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); @@ -2189,24 +2319,28 @@ EXT4_RO_ATTR(session_write_kbytes); EXT4_RO_ATTR(lifetime_write_kbytes); EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, inode_readahead_blks_store, s_inode_readahead_blks); +EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); +EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); static struct attribute *ext4_attrs[] = { ATTR_LIST(delayed_allocation_blocks), ATTR_LIST(session_write_kbytes), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(inode_readahead_blks), + ATTR_LIST(inode_goal), ATTR_LIST(mb_stats), ATTR_LIST(mb_max_to_scan), ATTR_LIST(mb_min_to_scan), ATTR_LIST(mb_order2_req), ATTR_LIST(mb_stream_req), ATTR_LIST(mb_group_prealloc), + ATTR_LIST(max_writeback_mb_bump), NULL, }; @@ -2239,7 +2373,7 @@ static void ext4_sb_release(struct kobject *kobj) } -static struct sysfs_ops ext4_attr_ops = { +static const struct sysfs_ops ext4_attr_ops = { .show = ext4_attr_show, .store = ext4_attr_store, }; @@ -2250,10 +2384,52 @@ static struct kobj_type ext4_ktype = { .release = ext4_sb_release, }; +/* + * Check whether this filesystem can be mounted based on + * the features present and the RDONLY/RDWR mount requested. + * Returns 1 if this filesystem can be mounted as requested, + * 0 if it cannot be. + */ +static int ext4_feature_set_ok(struct super_block *sb, int readonly) +{ + if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { + ext4_msg(sb, KERN_ERR, + "Couldn't mount because of " + "unsupported optional features (%x)", + (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & + ~EXT4_FEATURE_INCOMPAT_SUPP)); + return 0; + } + + if (readonly) + return 1; + + /* Check that feature set is OK for a read-write mount */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { + ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " + "unsupported optional features (%x)", + (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & + ~EXT4_FEATURE_RO_COMPAT_SUPP)); + return 0; + } + /* + * Large file size enabled file system can only be mounted + * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF + */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { + if (sizeof(blkcnt_t) < sizeof(u64)) { + ext4_msg(sb, KERN_ERR, "Filesystem with huge files " + "cannot be mounted RDWR without " + "CONFIG_LBDAF"); + return 0; + } + } + return 1; +} + static int ext4_fill_super(struct super_block *sb, void *data, int silent) __releases(kernel_lock) __acquires(kernel_lock) - { struct buffer_head *bh; struct ext4_super_block *es = NULL; @@ -2272,7 +2448,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) unsigned int db_count; unsigned int i; int needs_recovery, has_huge_files; - int features; __u64 blocks_count; int err; unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; @@ -2304,7 +2479,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); if (!blocksize) { - printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); + ext4_msg(sb, KERN_ERR, "unable to set blocksize"); goto out_fail; } @@ -2320,7 +2495,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (!(bh = sb_bread(sb, logical_sb_block))) { - printk(KERN_ERR "EXT4-fs: unable to read superblock\n"); + ext4_msg(sb, KERN_ERR, "unable to read superblock"); goto out_fail; } /* @@ -2338,8 +2513,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) def_mount_opts = le32_to_cpu(es->s_default_mount_opts); if (def_mount_opts & EXT4_DEFM_DEBUG) set_opt(sbi->s_mount_opt, DEBUG); - if (def_mount_opts & EXT4_DEFM_BSDGROUPS) + if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { + ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", + "2.6.38"); set_opt(sbi->s_mount_opt, GRPID); + } if (def_mount_opts & EXT4_DEFM_UID16) set_opt(sbi->s_mount_opt, NO_UID32); #ifdef CONFIG_EXT4_FS_XATTR @@ -2351,11 +2529,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, POSIX_ACL); #endif if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) - sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; + set_opt(sbi->s_mount_opt, JOURNAL_DATA); else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) - sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; + set_opt(sbi->s_mount_opt, ORDERED_DATA); else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) - sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; + set_opt(sbi->s_mount_opt, WRITEBACK_DATA); if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) set_opt(sbi->s_mount_opt, ERRORS_PANIC); @@ -2369,7 +2547,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; - sbi->s_mb_history_max = default_mb_history_length; set_opt(sbi->s_mount_opt, BARRIER); @@ -2377,75 +2554,45 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * enable delayed allocation by default * Use -o nodelalloc to turn it off */ - set_opt(sbi->s_mount_opt, DELALLOC); - + if (!IS_EXT3_SB(sb)) + set_opt(sbi->s_mount_opt, DELALLOC); if (!parse_options((char *) data, sb, &journal_devnum, &journal_ioprio, NULL, 0)) goto failed_mount; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) - printk(KERN_WARNING - "EXT4-fs warning: feature flags set on rev 0 fs, " - "running e2fsck is recommended\n"); + ext4_msg(sb, KERN_WARNING, + "feature flags set on rev 0 fs, " + "running e2fsck is recommended"); /* * Check feature flags regardless of the revision level, since we * previously didn't change the revision level when setting the flags, * so there is a chance incompat flags are set on a rev 0 filesystem. */ - features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); - if (features) { - printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " - "unsupported optional features (%x).\n", sb->s_id, - (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & - ~EXT4_FEATURE_INCOMPAT_SUPP)); - goto failed_mount; - } - features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); - if (!(sb->s_flags & MS_RDONLY) && features) { - printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " - "unsupported optional features (%x).\n", sb->s_id, - (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & - ~EXT4_FEATURE_RO_COMPAT_SUPP)); + if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) goto failed_mount; - } - has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_HUGE_FILE); - if (has_huge_files) { - /* - * Large file size enabled file system can only be - * mount if kernel is build with CONFIG_LBD - */ - if (sizeof(root->i_blocks) < sizeof(u64) && - !(sb->s_flags & MS_RDONLY)) { - printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " - "files cannot be mounted read-write " - "without CONFIG_LBD.\n", sb->s_id); - goto failed_mount; - } - } + blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); if (blocksize < EXT4_MIN_BLOCK_SIZE || blocksize > EXT4_MAX_BLOCK_SIZE) { - printk(KERN_ERR - "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", - blocksize, sb->s_id); + ext4_msg(sb, KERN_ERR, + "Unsupported filesystem blocksize %d", blocksize); goto failed_mount; } if (sb->s_blocksize != blocksize) { - /* Validate the filesystem blocksize */ if (!sb_set_blocksize(sb, blocksize)) { - printk(KERN_ERR "EXT4-fs: bad block size %d.\n", + ext4_msg(sb, KERN_ERR, "bad block size %d", blocksize); goto failed_mount; } @@ -2455,19 +2602,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) offset = do_div(logical_sb_block, blocksize); bh = sb_bread(sb, logical_sb_block); if (!bh) { - printk(KERN_ERR - "EXT4-fs: Can't read superblock on 2nd try.\n"); + ext4_msg(sb, KERN_ERR, + "Can't read superblock on 2nd try"); goto failed_mount; } es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); sbi->s_es = es; if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { - printk(KERN_ERR - "EXT4-fs: Magic mismatch, very weird !\n"); + ext4_msg(sb, KERN_ERR, + "Magic mismatch, very weird!"); goto failed_mount; } } + has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_HUGE_FILE); sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, has_huge_files); sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); @@ -2481,30 +2630,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || (!is_power_of_2(sbi->s_inode_size)) || (sbi->s_inode_size > blocksize)) { - printk(KERN_ERR - "EXT4-fs: unsupported inode size: %d\n", + ext4_msg(sb, KERN_ERR, + "unsupported inode size: %d", sbi->s_inode_size); goto failed_mount; } if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); } + sbi->s_desc_size = le16_to_cpu(es->s_desc_size); if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || sbi->s_desc_size > EXT4_MAX_DESC_SIZE || !is_power_of_2(sbi->s_desc_size)) { - printk(KERN_ERR - "EXT4-fs: unsupported descriptor size %lu\n", + ext4_msg(sb, KERN_ERR, + "unsupported descriptor size %lu", sbi->s_desc_size); goto failed_mount; } } else sbi->s_desc_size = EXT4_MIN_DESC_SIZE; + sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) goto cantfind_ext4; + sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); if (sbi->s_inodes_per_block == 0) goto cantfind_ext4; @@ -2515,6 +2667,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_mount_state = le16_to_cpu(es->s_state); sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); + for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; @@ -2532,25 +2685,31 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (sbi->s_blocks_per_group > blocksize * 8) { - printk(KERN_ERR - "EXT4-fs: #blocks per group too big: %lu\n", + ext4_msg(sb, KERN_ERR, + "#blocks per group too big: %lu", sbi->s_blocks_per_group); goto failed_mount; } if (sbi->s_inodes_per_group > blocksize * 8) { - printk(KERN_ERR - "EXT4-fs: #inodes per group too big: %lu\n", + ext4_msg(sb, KERN_ERR, + "#inodes per group too big: %lu", sbi->s_inodes_per_group); goto failed_mount; } - if (ext4_blocks_count(es) > - (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { - printk(KERN_ERR "EXT4-fs: filesystem on %s:" - " too large to mount safely\n", sb->s_id); + /* + * Test whether we have more sectors than will fit in sector_t, + * and whether the max offset is addressable by the page cache. + */ + if ((ext4_blocks_count(es) > + (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || + (ext4_blocks_count(es) > + (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { + ext4_msg(sb, KERN_ERR, "filesystem" + " too large to mount safely on this system"); if (sizeof(sector_t) < 8) - printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " - "enabled\n"); + ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); + ret = -EFBIG; goto failed_mount; } @@ -2560,21 +2719,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* check blocks count against device size */ blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; if (blocks_count && ext4_blocks_count(es) > blocks_count) { - printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu " - "exceeds size of device (%llu blocks)\n", + ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " + "exceeds size of device (%llu blocks)", ext4_blocks_count(es), blocks_count); goto failed_mount; } - /* - * It makes no sense for the first data block to be beyond the end - * of the filesystem. - */ - if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { - printk(KERN_WARNING "EXT4-fs: bad geometry: first data" - "block %u is beyond end of filesystem (%llu)\n", - le32_to_cpu(es->s_first_data_block), - ext4_blocks_count(es)); + /* + * It makes no sense for the first data block to be beyond the end + * of the filesystem. + */ + if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { + ext4_msg(sb, KERN_WARNING, "bad geometry: first data" + "block %u is beyond end of filesystem (%llu)", + le32_to_cpu(es->s_first_data_block), + ext4_blocks_count(es)); goto failed_mount; } blocks_count = (ext4_blocks_count(es) - @@ -2582,21 +2741,23 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_BLOCKS_PER_GROUP(sb) - 1); do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { - printk(KERN_WARNING "EXT4-fs: groups count too large: %u " + ext4_msg(sb, KERN_WARNING, "groups count too large: %u " "(block count %llu, first data block %u, " - "blocks per group %lu)\n", sbi->s_groups_count, + "blocks per group %lu)", sbi->s_groups_count, ext4_blocks_count(es), le32_to_cpu(es->s_first_data_block), EXT4_BLOCKS_PER_GROUP(sb)); goto failed_mount; } sbi->s_groups_count = blocks_count; + sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, + (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), GFP_KERNEL); if (sbi->s_group_desc == NULL) { - printk(KERN_ERR "EXT4-fs: not enough memory\n"); + ext4_msg(sb, KERN_ERR, "not enough memory"); goto failed_mount; } @@ -2611,21 +2772,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) block = descriptor_loc(sb, logical_sb_block, i); sbi->s_group_desc[i] = sb_bread(sb, block); if (!sbi->s_group_desc[i]) { - printk(KERN_ERR "EXT4-fs: " - "can't read group descriptor %d\n", i); + ext4_msg(sb, KERN_ERR, + "can't read group descriptor %d", i); db_count = i; goto failed_mount2; } } if (!ext4_check_descriptors(sb)) { - printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); + ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); goto failed_mount2; } if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) if (!ext4_fill_flex_info(sb)) { - printk(KERN_ERR - "EXT4-fs: unable to initialize " - "flex_bg meta info!\n"); + ext4_msg(sb, KERN_ERR, + "unable to initialize " + "flex_bg meta info!"); goto failed_mount2; } @@ -2633,25 +2794,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); - err = percpu_counter_init(&sbi->s_freeblocks_counter, - ext4_count_free_blocks(sb)); - if (!err) { - err = percpu_counter_init(&sbi->s_freeinodes_counter, - ext4_count_free_inodes(sb)); - } - if (!err) { - err = percpu_counter_init(&sbi->s_dirs_counter, - ext4_count_dirs(sb)); - } - if (!err) { - err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); - } - if (err) { - printk(KERN_ERR "EXT4-fs: insufficient memory\n"); - goto failed_mount3; - } - sbi->s_stripe = ext4_get_stripe_size(sbi); + sbi->s_max_writeback_mb_bump = 128; /* * set up enough so that it can read an inode @@ -2685,31 +2829,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { if (ext4_load_journal(sb, es, journal_devnum)) goto failed_mount3; - if (!(sb->s_flags & MS_RDONLY) && - EXT4_SB(sb)->s_journal->j_failed_commit) { - printk(KERN_CRIT "EXT4-fs error (device %s): " - "ext4_fill_super: Journal transaction " - "%u is corrupt\n", sb->s_id, - EXT4_SB(sb)->s_journal->j_failed_commit); - if (test_opt(sb, ERRORS_RO)) { - printk(KERN_CRIT - "Mounting filesystem read-only\n"); - sb->s_flags |= MS_RDONLY; - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - } - if (test_opt(sb, ERRORS_PANIC)) { - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, 1); - goto failed_mount4; - } - } } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { - printk(KERN_ERR "EXT4-fs: required journal recovery " - "suppressed and not mounted read-only\n"); - goto failed_mount4; + ext4_msg(sb, KERN_ERR, "required journal recovery " + "suppressed and not mounted read-only"); + goto failed_mount_wq; } else { clear_opt(sbi->s_mount_opt, DATA_FLAGS); set_opt(sbi->s_mount_opt, WRITEBACK_DATA); @@ -2721,8 +2845,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (ext4_blocks_count(es) > 0xffffffffULL && !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_64BIT)) { - printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n"); - goto failed_mount4; + ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); + goto failed_mount_wq; } if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { @@ -2759,9 +2883,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) case EXT4_MOUNT_WRITEBACK_DATA: if (!jbd2_journal_check_available_features (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { - printk(KERN_ERR "EXT4-fs: Journal does not support " - "requested data journaling mode\n"); - goto failed_mount4; + ext4_msg(sb, KERN_ERR, "Journal does not support " + "requested data journaling mode"); + goto failed_mount_wq; } default: break; @@ -2769,14 +2893,38 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); no_journal: - + err = percpu_counter_init(&sbi->s_freeblocks_counter, + ext4_count_free_blocks(sb)); + if (!err) + err = percpu_counter_init(&sbi->s_freeinodes_counter, + ext4_count_free_inodes(sb)); + if (!err) + err = percpu_counter_init(&sbi->s_dirs_counter, + ext4_count_dirs(sb)); + if (!err) + err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); + if (err) { + ext4_msg(sb, KERN_ERR, "insufficient memory"); + goto failed_mount_wq; + } if (test_opt(sb, NOBH)) { if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { - printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " - "its supported only with writeback mode\n"); + ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " + "its supported only with writeback mode"); clear_opt(sbi->s_mount_opt, NOBH); } + if (test_opt(sb, DIOREAD_NOLOCK)) { + ext4_msg(sb, KERN_WARNING, "dioread_nolock option is " + "not supported with nobh mode"); + goto failed_mount_wq; + } + } + EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); + if (!EXT4_SB(sb)->dio_unwritten_wq) { + printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); + goto failed_mount_wq; } + /* * The jbd2_journal_load will have done any necessary log recovery, * so we can safely mount the rest of the filesystem now. @@ -2784,18 +2932,18 @@ no_journal: root = ext4_iget(sb, EXT4_ROOT_INO); if (IS_ERR(root)) { - printk(KERN_ERR "EXT4-fs: get root inode failed\n"); + ext4_msg(sb, KERN_ERR, "get root inode failed"); ret = PTR_ERR(root); goto failed_mount4; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { iput(root); - printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); + ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); goto failed_mount4; } sb->s_root = d_alloc_root(root); if (!sb->s_root) { - printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); + ext4_msg(sb, KERN_ERR, "get root dentry failed"); iput(root); ret = -ENOMEM; goto failed_mount4; @@ -2824,29 +2972,41 @@ no_journal: sbi->s_inode_size) { sbi->s_want_extra_isize = sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE; - printk(KERN_INFO "EXT4-fs: required extra inode space not" - "available.\n"); + ext4_msg(sb, KERN_INFO, "required extra inode space not" + "available"); } - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { - printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " - "requested data journaling mode\n"); + if (test_opt(sb, DELALLOC) && + (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { + ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " + "requested data journaling mode"); clear_opt(sbi->s_mount_opt, DELALLOC); - } else if (test_opt(sb, DELALLOC)) - printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); + } + if (test_opt(sb, DIOREAD_NOLOCK)) { + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { + ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " + "option - requested data journaling mode"); + clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); + } + if (sb->s_blocksize < PAGE_SIZE) { + ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " + "option - block size is too small"); + clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); + } + } err = ext4_setup_system_zone(sb); if (err) { - printk(KERN_ERR "EXT4-fs: failed to initialize system " - "zone (%d)\n", err); + ext4_msg(sb, KERN_ERR, "failed to initialize system " + "zone (%d)\n", err); goto failed_mount4; } ext4_ext_init(sb); err = ext4_mb_init(sb, needs_recovery); if (err) { - printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", - err); + ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", + err); goto failed_mount4; } @@ -2864,7 +3024,7 @@ no_journal: ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; if (needs_recovery) { - printk(KERN_INFO "EXT4-fs: recovery complete.\n"); + ext4_msg(sb, KERN_INFO, "recovery complete"); ext4_mark_recovery_complete(sb, es); } if (EXT4_SB(sb)->s_journal) { @@ -2877,25 +3037,29 @@ no_journal: } else descr = "out journal"; - printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n", - sb->s_id, descr); + ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr); lock_kernel(); return 0; cantfind_ext4: if (!silent) - printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", - sb->s_id); + ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); goto failed_mount; failed_mount4: - printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id); + ext4_msg(sb, KERN_ERR, "mount failed"); + destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); +failed_mount_wq: ext4_release_system_zone(sb); if (sbi->s_journal) { jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; } + percpu_counter_destroy(&sbi->s_freeblocks_counter); + percpu_counter_destroy(&sbi->s_freeinodes_counter); + percpu_counter_destroy(&sbi->s_dirs_counter); + percpu_counter_destroy(&sbi->s_dirtyblocks_counter); failed_mount3: if (sbi->s_flex_groups) { if (is_vmalloc_addr(sbi->s_flex_groups)) @@ -2903,10 +3067,6 @@ failed_mount3: else kfree(sbi->s_flex_groups); } - percpu_counter_destroy(&sbi->s_freeblocks_counter); - percpu_counter_destroy(&sbi->s_freeinodes_counter); - percpu_counter_destroy(&sbi->s_dirs_counter); - percpu_counter_destroy(&sbi->s_dirtyblocks_counter); failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); @@ -2968,27 +3128,27 @@ static journal_t *ext4_get_journal(struct super_block *sb, journal_inode = ext4_iget(sb, journal_inum); if (IS_ERR(journal_inode)) { - printk(KERN_ERR "EXT4-fs: no journal found.\n"); + ext4_msg(sb, KERN_ERR, "no journal found"); return NULL; } if (!journal_inode->i_nlink) { make_bad_inode(journal_inode); iput(journal_inode); - printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); + ext4_msg(sb, KERN_ERR, "journal inode is deleted"); return NULL; } jbd_debug(2, "Journal inode found at %p: %lld bytes\n", journal_inode, journal_inode->i_size); if (!S_ISREG(journal_inode->i_mode)) { - printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); + ext4_msg(sb, KERN_ERR, "invalid journal inode"); iput(journal_inode); return NULL; } journal = jbd2_journal_init_inode(journal_inode); if (!journal) { - printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); + ext4_msg(sb, KERN_ERR, "Could not load journal inode"); iput(journal_inode); return NULL; } @@ -3012,22 +3172,22 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); - bdev = ext4_blkdev_get(j_dev); + bdev = ext4_blkdev_get(j_dev, sb); if (bdev == NULL) return NULL; if (bd_claim(bdev, sb)) { - printk(KERN_ERR - "EXT4-fs: failed to claim external journal device.\n"); + ext4_msg(sb, KERN_ERR, + "failed to claim external journal device"); blkdev_put(bdev, FMODE_READ|FMODE_WRITE); return NULL; } blocksize = sb->s_blocksize; - hblock = bdev_hardsect_size(bdev); + hblock = bdev_logical_block_size(bdev); if (blocksize < hblock) { - printk(KERN_ERR - "EXT4-fs: blocksize too small for journal device.\n"); + ext4_msg(sb, KERN_ERR, + "blocksize too small for journal device"); goto out_bdev; } @@ -3035,8 +3195,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, offset = EXT4_MIN_BLOCK_SIZE % blocksize; set_blocksize(bdev, blocksize); if (!(bh = __bread(bdev, sb_block, blocksize))) { - printk(KERN_ERR "EXT4-fs: couldn't read superblock of " - "external journal\n"); + ext4_msg(sb, KERN_ERR, "couldn't read superblock of " + "external journal"); goto out_bdev; } @@ -3044,14 +3204,14 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || !(le32_to_cpu(es->s_feature_incompat) & EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { - printk(KERN_ERR "EXT4-fs: external journal has " - "bad superblock\n"); + ext4_msg(sb, KERN_ERR, "external journal has " + "bad superblock"); brelse(bh); goto out_bdev; } if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { - printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); + ext4_msg(sb, KERN_ERR, "journal UUID does not match"); brelse(bh); goto out_bdev; } @@ -3063,25 +3223,26 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, journal = jbd2_journal_init_dev(bdev, sb->s_bdev, start, len, blocksize); if (!journal) { - printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); + ext4_msg(sb, KERN_ERR, "failed to create device journal"); goto out_bdev; } journal->j_private = sb; ll_rw_block(READ, 1, &journal->j_sb_buffer); wait_on_buffer(journal->j_sb_buffer); if (!buffer_uptodate(journal->j_sb_buffer)) { - printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); + ext4_msg(sb, KERN_ERR, "I/O error on journal device"); goto out_journal; } if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { - printk(KERN_ERR "EXT4-fs: External journal has more than one " - "user (unsupported) - %d\n", + ext4_msg(sb, KERN_ERR, "External journal has more than one " + "user (unsupported) - %d", be32_to_cpu(journal->j_superblock->s_nr_users)); goto out_journal; } EXT4_SB(sb)->journal_bdev = bdev; ext4_init_journal_params(sb, journal); return journal; + out_journal: jbd2_journal_destroy(journal); out_bdev: @@ -3103,8 +3264,8 @@ static int ext4_load_journal(struct super_block *sb, if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { - printk(KERN_INFO "EXT4-fs: external journal device major/minor " - "numbers have changed\n"); + ext4_msg(sb, KERN_INFO, "external journal device major/minor " + "numbers have changed"); journal_dev = new_decode_dev(journal_devnum); } else journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); @@ -3116,24 +3277,23 @@ static int ext4_load_journal(struct super_block *sb, * crash? For recovery, we need to check in advance whether we * can get read-write access to the device. */ - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { if (sb->s_flags & MS_RDONLY) { - printk(KERN_INFO "EXT4-fs: INFO: recovery " - "required on readonly filesystem.\n"); + ext4_msg(sb, KERN_INFO, "INFO: recovery " + "required on readonly filesystem"); if (really_read_only) { - printk(KERN_ERR "EXT4-fs: write access " - "unavailable, cannot proceed.\n"); + ext4_msg(sb, KERN_ERR, "write access " + "unavailable, cannot proceed"); return -EROFS; } - printk(KERN_INFO "EXT4-fs: write access will " - "be enabled during recovery.\n"); + ext4_msg(sb, KERN_INFO, "write access will " + "be enabled during recovery"); } } if (journal_inum && journal_dev) { - printk(KERN_ERR "EXT4-fs: filesystem has both journal " - "and inode journals!\n"); + ext4_msg(sb, KERN_ERR, "filesystem has both journal " + "and inode journals!"); return -EINVAL; } @@ -3145,15 +3305,13 @@ static int ext4_load_journal(struct super_block *sb, return -EINVAL; } - if (journal->j_flags & JBD2_BARRIER) - printk(KERN_INFO "EXT4-fs: barriers enabled\n"); - else - printk(KERN_INFO "EXT4-fs: barriers disabled\n"); + if (!(journal->j_flags & JBD2_BARRIER)) + ext4_msg(sb, KERN_INFO, "barriers disabled"); if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { err = jbd2_journal_update_format(journal); if (err) { - printk(KERN_ERR "EXT4-fs: error updating journal.\n"); + ext4_msg(sb, KERN_ERR, "error updating journal"); jbd2_journal_destroy(journal); return err; } @@ -3165,7 +3323,7 @@ static int ext4_load_journal(struct super_block *sb, err = jbd2_journal_load(journal); if (err) { - printk(KERN_ERR "EXT4-fs: error loading journal.\n"); + ext4_msg(sb, KERN_ERR, "error loading journal"); jbd2_journal_destroy(journal); return err; } @@ -3201,12 +3359,23 @@ static int ext4_commit_super(struct super_block *sb, int sync) * be remapped. Nothing we can do but to retry the * write and hope for the best. */ - printk(KERN_ERR "EXT4-fs: previous I/O error to " - "superblock detected for %s.\n", sb->s_id); + ext4_msg(sb, KERN_ERR, "previous I/O error to " + "superblock detected"); clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } - es->s_wtime = cpu_to_le32(get_seconds()); + /* + * If the file system is mounted read-only, don't update the + * superblock write time. This avoids updating the superblock + * write time when we are mounting the root file system + * read/only but we need to replay the journal; at that point, + * for people who are east of GMT and who make their clock + * tick in localtime for Windows bug-for-bug compatibility, + * the clock is set in the future, and this will cause e2fsck + * to complain and force a full file system check. + */ + if (!(sb->s_flags & MS_RDONLY)) + es->s_wtime = cpu_to_le32(get_seconds()); es->s_kbytes_written = cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - @@ -3225,8 +3394,8 @@ static int ext4_commit_super(struct super_block *sb, int sync) error = buffer_write_io_error(sbh); if (error) { - printk(KERN_ERR "EXT4-fs: I/O error while writing " - "superblock for %s.\n", sb->s_id); + ext4_msg(sb, KERN_ERR, "I/O error while writing " + "superblock"); clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } @@ -3234,7 +3403,6 @@ static int ext4_commit_super(struct super_block *sb, int sync) return error; } - /* * Have we just finished recovery? If so, and if we are mounting (or * remounting) the filesystem readonly, then we will end up with a @@ -3289,10 +3457,9 @@ static void ext4_clear_journal_err(struct super_block *sb, char nbuf[16]; errstr = ext4_decode_error(sb, j_errno, nbuf); - ext4_warning(sb, __func__, "Filesystem error recorded " + ext4_warning(sb, "Filesystem error recorded " "from previous mount: %s", errstr); - ext4_warning(sb, __func__, "Marking fs in need of " - "filesystem check."); + ext4_warning(sb, "Marking fs in need of filesystem check."); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); @@ -3315,26 +3482,32 @@ int ext4_force_commit(struct super_block *sb) return 0; journal = EXT4_SB(sb)->s_journal; - if (journal) + if (journal) { + vfs_check_frozen(sb, SB_FREEZE_WRITE); ret = ext4_journal_force_commit(journal); + } return ret; } static void ext4_write_super(struct super_block *sb) { + lock_super(sb); ext4_commit_super(sb, 1); + unlock_super(sb); } static int ext4_sync_fs(struct super_block *sb, int wait) { int ret = 0; tid_t target; + struct ext4_sb_info *sbi = EXT4_SB(sb); - trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); - if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { + trace_ext4_sync_fs(sb, wait); + flush_workqueue(sbi->dio_unwritten_wq); + if (jbd2_journal_start_commit(sbi->s_journal, &target)) { if (wait) - jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); + jbd2_log_wait_commit(sbi->s_journal, target); } return ret; } @@ -3361,18 +3534,16 @@ static int ext4_freeze(struct super_block *sb) * the journal. */ error = jbd2_journal_flush(journal); - if (error < 0) { - out: - jbd2_journal_unlock_updates(journal); - return error; - } + if (error < 0) + goto out; /* Journal blocked and flushed, clear needs_recovery flag. */ EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); error = ext4_commit_super(sb, 1); - if (error) - goto out; - return 0; +out: + /* we rely on s_frozen to stop further updates */ + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + return error; } /* @@ -3389,7 +3560,6 @@ static int ext4_unfreeze(struct super_block *sb) EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); ext4_commit_super(sb, 1); unlock_super(sb); - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); return 0; } @@ -3407,7 +3577,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) int i; #endif + lock_kernel(); + /* Store the original options */ + lock_super(sb); old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; old_opts.s_resuid = sbi->s_resuid; @@ -3432,11 +3605,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) + if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) ext4_abort(sb, __func__, "Abort forced by user"); sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); es = sbi->s_es; @@ -3447,7 +3620,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || n_blocks_count > ext4_blocks_count(es)) { - if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { + if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { err = -EROFS; goto restore_opts; } @@ -3471,31 +3644,22 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal) ext4_mark_recovery_complete(sb, es); } else { - int ret; - if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, - ~EXT4_FEATURE_RO_COMPAT_SUPP))) { - printk(KERN_WARNING "EXT4-fs: %s: couldn't " - "remount RDWR because of unsupported " - "optional features (%x).\n", sb->s_id, - (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & - ~EXT4_FEATURE_RO_COMPAT_SUPP)); + /* Make sure we can mount this feature set readwrite */ + if (!ext4_feature_set_ok(sb, 0)) { err = -EROFS; goto restore_opts; } - /* * Make sure the group descriptor checksums - * are sane. If they aren't, refuse to - * remount r/w. + * are sane. If they aren't, refuse to remount r/w. */ for (g = 0; g < sbi->s_groups_count; g++) { struct ext4_group_desc *gdp = ext4_get_group_desc(sb, g, NULL); if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { - printk(KERN_ERR - "EXT4-fs: ext4_remount: " - "Checksum for group %u failed (%u!=%u)\n", + ext4_msg(sb, KERN_ERR, + "ext4_remount: Checksum for group %u failed (%u!=%u)", g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), le16_to_cpu(gdp->bg_checksum)); err = -EINVAL; @@ -3509,11 +3673,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) * require a full umount/remount for now. */ if (es->s_last_orphan) { - printk(KERN_WARNING "EXT4-fs: %s: couldn't " + ext4_msg(sb, KERN_WARNING, "Couldn't " "remount RDWR because of unprocessed " "orphan inode list. Please " - "umount/remount instead.\n", - sb->s_id); + "umount/remount instead"); err = -EINVAL; goto restore_opts; } @@ -3544,7 +3707,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) old_opts.s_qf_names[i] != sbi->s_qf_names[i]) kfree(old_opts.s_qf_names[i]); #endif + unlock_super(sb); + unlock_kernel(); return 0; + restore_opts: sb->s_flags = old_sb_flags; sbi->s_mount_opt = old_opts.s_mount_opt; @@ -3562,6 +3728,8 @@ restore_opts: sbi->s_qf_names[i] = old_opts.s_qf_names[i]; } #endif + unlock_super(sb); + unlock_kernel(); return err; } @@ -3616,27 +3784,26 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); - ext4_free_blocks_count_set(es, buf->f_bfree); buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); if (buf->f_bfree < ext4_r_blocks_count(es)) buf->f_bavail = 0; buf->f_files = le32_to_cpu(es->s_inodes_count); buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); - es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); buf->f_namelen = EXT4_NAME_LEN; fsid = le64_to_cpup((void *)es->s_uuid) ^ le64_to_cpup((void *)es->s_uuid + sizeof(u64)); buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; + return 0; } -/* Helper function for writing quotas on sync - we need to start transaction before quota file - * is locked for write. Otherwise the are possible deadlocks: +/* Helper function for writing quotas on sync - we need to start transaction + * before quota file is locked for write. Otherwise the are possible deadlocks: * Process 1 Process 2 * ext4_create() quota_sync() * jbd2_journal_start() write_dquot() - * vfs_dq_init() down(dqio_mutex) + * dquot_initialize() down(dqio_mutex) * down(dqio_mutex) jbd2_journal_start() * */ @@ -3656,7 +3823,7 @@ static int ext4_write_dquot(struct dquot *dquot) inode = dquot_to_inode(dquot); handle = ext4_journal_start(inode, - EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit(dquot); @@ -3672,7 +3839,7 @@ static int ext4_acquire_dquot(struct dquot *dquot) handle_t *handle; handle = ext4_journal_start(dquot_to_inode(dquot), - EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_acquire(dquot); @@ -3688,7 +3855,7 @@ static int ext4_release_dquot(struct dquot *dquot) handle_t *handle; handle = ext4_journal_start(dquot_to_inode(dquot), - EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); if (IS_ERR(handle)) { /* Release dquot anyway to avoid endless cycle in dqput() */ dquot_release(dquot); @@ -3736,7 +3903,7 @@ static int ext4_write_info(struct super_block *sb, int type) static int ext4_quota_on_mount(struct super_block *sb, int type) { return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], - EXT4_SB(sb)->s_jquota_fmt, type); + EXT4_SB(sb)->s_jquota_fmt, type); } /* @@ -3767,9 +3934,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, if (EXT4_SB(sb)->s_qf_names[type]) { /* Quotafile not in fs root? */ if (path.dentry->d_parent != sb->s_root) - printk(KERN_WARNING - "EXT4-fs: Quota file not on filesystem root. " - "Journaled quota will not work.\n"); + ext4_msg(sb, KERN_WARNING, + "Quota file not on filesystem root. " + "Journaled quota will not work"); } /* @@ -3845,102 +4012,126 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); - int tocopy; int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; - size_t towrite = len; struct buffer_head *bh; handle_t *handle = journal_current_handle(); if (EXT4_SB(sb)->s_journal && !handle) { - printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" - " cancelled because transaction is not started.\n", + ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" + " cancelled because transaction is not started", + (unsigned long long)off, (unsigned long long)len); + return -EIO; + } + /* + * Since we account only one data block in transaction credits, + * then it is impossible to cross a block boundary. + */ + if (sb->s_blocksize - offset < len) { + ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" + " cancelled because not block aligned", (unsigned long long)off, (unsigned long long)len); return -EIO; } + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); - while (towrite > 0) { - tocopy = sb->s_blocksize - offset < towrite ? - sb->s_blocksize - offset : towrite; - bh = ext4_bread(handle, inode, blk, 1, &err); - if (!bh) + bh = ext4_bread(handle, inode, blk, 1, &err); + if (!bh) + goto out; + if (journal_quota) { + err = ext4_journal_get_write_access(handle, bh); + if (err) { + brelse(bh); goto out; - if (journal_quota) { - err = ext4_journal_get_write_access(handle, bh); - if (err) { - brelse(bh); - goto out; - } - } - lock_buffer(bh); - memcpy(bh->b_data+offset, data, tocopy); - flush_dcache_page(bh->b_page); - unlock_buffer(bh); - if (journal_quota) - err = ext4_handle_dirty_metadata(handle, NULL, bh); - else { - /* Always do at least ordered writes for quotas */ - err = ext4_jbd2_file_inode(handle, inode); - mark_buffer_dirty(bh); } - brelse(bh); - if (err) - goto out; - offset = 0; - towrite -= tocopy; - data += tocopy; - blk++; } + lock_buffer(bh); + memcpy(bh->b_data+offset, data, len); + flush_dcache_page(bh->b_page); + unlock_buffer(bh); + if (journal_quota) + err = ext4_handle_dirty_metadata(handle, NULL, bh); + else { + /* Always do at least ordered writes for quotas */ + err = ext4_jbd2_file_inode(handle, inode); + mark_buffer_dirty(bh); + } + brelse(bh); out: - if (len == towrite) { + if (err) { mutex_unlock(&inode->i_mutex); return err; } - if (inode->i_size < off+len-towrite) { - i_size_write(inode, off+len-towrite); + if (inode->i_size < off + len) { + i_size_write(inode, off + len); EXT4_I(inode)->i_disksize = inode->i_size; } inode->i_mtime = inode->i_ctime = CURRENT_TIME; ext4_mark_inode_dirty(handle, inode); mutex_unlock(&inode->i_mutex); - return len - towrite; + return len; } #endif -static int ext4_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static int ext4_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); + return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); } -static struct file_system_type ext4_fs_type = { +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +static struct file_system_type ext2_fs_type = { .owner = THIS_MODULE, - .name = "ext4", + .name = "ext2", .get_sb = ext4_get_sb, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; -#ifdef CONFIG_EXT4DEV_COMPAT -static int ext4dev_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static inline void register_as_ext2(void) +{ + int err = register_filesystem(&ext2_fs_type); + if (err) + printk(KERN_WARNING + "EXT4-fs: Unable to register as ext2 (%d)\n", err); +} + +static inline void unregister_as_ext2(void) +{ + unregister_filesystem(&ext2_fs_type); +} +MODULE_ALIAS("ext2"); +#else +static inline void register_as_ext2(void) { } +static inline void unregister_as_ext2(void) { } +#endif + +#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +static inline void register_as_ext3(void) +{ + int err = register_filesystem(&ext3_fs_type); + if (err) + printk(KERN_WARNING + "EXT4-fs: Unable to register as ext3 (%d)\n", err); +} + +static inline void unregister_as_ext3(void) { - printk(KERN_WARNING "EXT4-fs: Update your userspace programs " - "to mount using ext4\n"); - printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " - "will go away by 2.6.31\n"); - return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); + unregister_filesystem(&ext3_fs_type); } +MODULE_ALIAS("ext3"); +#else +static inline void register_as_ext3(void) { } +static inline void unregister_as_ext3(void) { } +#endif -static struct file_system_type ext4dev_fs_type = { +static struct file_system_type ext4_fs_type = { .owner = THIS_MODULE, - .name = "ext4dev", - .get_sb = ext4dev_get_sb, + .name = "ext4", + .get_sb = ext4_get_sb, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; -MODULE_ALIAS("ext4dev"); -#endif static int __init init_ext4_fs(void) { @@ -3963,18 +4154,15 @@ static int __init init_ext4_fs(void) err = init_inodecache(); if (err) goto out1; + register_as_ext2(); + register_as_ext3(); err = register_filesystem(&ext4_fs_type); if (err) goto out; -#ifdef CONFIG_EXT4DEV_COMPAT - err = register_filesystem(&ext4dev_fs_type); - if (err) { - unregister_filesystem(&ext4_fs_type); - goto out; - } -#endif return 0; out: + unregister_as_ext2(); + unregister_as_ext3(); destroy_inodecache(); out1: exit_ext4_xattr(); @@ -3990,10 +4178,9 @@ out4: static void __exit exit_ext4_fs(void) { + unregister_as_ext2(); + unregister_as_ext3(); unregister_filesystem(&ext4_fs_type); -#ifdef CONFIG_EXT4DEV_COMPAT - unregister_filesystem(&ext4dev_fs_type); -#endif destroy_inodecache(); exit_ext4_xattr(); exit_ext4_mballoc();